linux/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c

4099 lines
108 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018, Intel Corporation. */
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_fltr.h"
/**
* ice_validate_vf_id - helper to check if VF ID is valid
* @pf: pointer to the PF structure
* @vf_id: the ID of the VF to check
*/
static int ice_validate_vf_id(struct ice_pf *pf, u16 vf_id)
{
/* vf_id range is only valid for 0-255, and should always be unsigned */
if (vf_id >= pf->num_alloc_vfs) {
dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %u\n", vf_id);
return -EINVAL;
}
return 0;
}
/**
* ice_check_vf_init - helper to check if VF init complete
* @pf: pointer to the PF structure
* @vf: the pointer to the VF to check
*/
static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
{
if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n",
vf->vf_id);
return -EBUSY;
}
return 0;
}
/**
* ice_err_to_virt_err - translate errors for VF return code
* @ice_err: error return code
*/
static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err)
{
switch (ice_err) {
case ICE_SUCCESS:
return VIRTCHNL_STATUS_SUCCESS;
case ICE_ERR_BAD_PTR:
case ICE_ERR_INVAL_SIZE:
case ICE_ERR_DEVICE_NOT_SUPPORTED:
case ICE_ERR_PARAM:
case ICE_ERR_CFG:
return VIRTCHNL_STATUS_ERR_PARAM;
case ICE_ERR_NO_MEMORY:
return VIRTCHNL_STATUS_ERR_NO_MEMORY;
case ICE_ERR_NOT_READY:
case ICE_ERR_RESET_FAILED:
case ICE_ERR_FW_API_VER:
case ICE_ERR_AQ_ERROR:
case ICE_ERR_AQ_TIMEOUT:
case ICE_ERR_AQ_FULL:
case ICE_ERR_AQ_NO_WORK:
case ICE_ERR_AQ_EMPTY:
return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
default:
return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
}
}
/**
* ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
* @pf: pointer to the PF structure
* @v_opcode: operation code
* @v_retval: return value
* @msg: pointer to the msg buffer
* @msglen: msg length
*/
static void
ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
{
struct ice_hw *hw = &pf->hw;
unsigned int i;
ice_for_each_vf(pf, i) {
struct ice_vf *vf = &pf->vf[i];
/* Not all vfs are enabled so skip the ones that are not */
if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
continue;
/* Ignore return value on purpose - a given VF may fail, but
* we need to keep going and send to all of them
*/
ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg,
msglen, NULL);
}
}
/**
* ice_set_pfe_link - Set the link speed/status of the virtchnl_pf_event
* @vf: pointer to the VF structure
* @pfe: pointer to the virtchnl_pf_event to set link speed/status for
* @ice_link_speed: link speed specified by ICE_AQ_LINK_SPEED_*
* @link_up: whether or not to set the link up/down
*/
static void
ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
int ice_link_speed, bool link_up)
{
if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
pfe->event_data.link_event_adv.link_status = link_up;
/* Speed in Mbps */
pfe->event_data.link_event_adv.link_speed =
ice_conv_link_speed_to_virtchnl(true, ice_link_speed);
} else {
pfe->event_data.link_event.link_status = link_up;
/* Legacy method for virtchnl link speeds */
pfe->event_data.link_event.link_speed =
(enum virtchnl_link_speed)
ice_conv_link_speed_to_virtchnl(false, ice_link_speed);
}
}
/**
* ice_vf_has_no_qs_ena - check if the VF has any Rx or Tx queues enabled
* @vf: the VF to check
*
* Returns true if the VF has no Rx and no Tx queues enabled and returns false
* otherwise
*/
static bool ice_vf_has_no_qs_ena(struct ice_vf *vf)
{
return (!bitmap_weight(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF) &&
!bitmap_weight(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF));
}
/**
* ice_is_vf_link_up - check if the VF's link is up
* @vf: VF to check if link is up
*/
static bool ice_is_vf_link_up(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
if (ice_check_vf_init(pf, vf))
return false;
if (ice_vf_has_no_qs_ena(vf))
return false;
else if (vf->link_forced)
return vf->link_up;
else
return pf->hw.port_info->phy.link_info.link_info &
ICE_AQ_LINK_UP;
}
/**
* ice_vc_notify_vf_link_state - Inform a VF of link status
* @vf: pointer to the VF structure
*
* send a link status message to a single VF
*/
static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
{
struct virtchnl_pf_event pfe = { 0 };
struct ice_hw *hw = &vf->pf->hw;
pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
pfe.severity = PF_EVENT_SEVERITY_INFO;
if (ice_is_vf_link_up(vf))
ice_set_pfe_link(vf, &pfe,
hw->port_info->phy.link_info.link_speed, true);
else
ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
sizeof(pfe), NULL);
}
/**
* ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access
* @vf: VF to remove access to VSI for
*/
static void ice_vf_invalidate_vsi(struct ice_vf *vf)
{
vf->lan_vsi_idx = ICE_NO_VSI;
vf->lan_vsi_num = ICE_NO_VSI;
}
/**
* ice_vf_vsi_release - invalidate the VF's VSI after freeing it
* @vf: invalidate this VF's VSI after freeing it
*/
static void ice_vf_vsi_release(struct ice_vf *vf)
{
ice_vsi_release(vf->pf->vsi[vf->lan_vsi_idx]);
ice_vf_invalidate_vsi(vf);
}
/**
* ice_free_vf_res - Free a VF's resources
* @vf: pointer to the VF info
*/
static void ice_free_vf_res(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
int i, last_vector_idx;
/* First, disable VF's configuration API to prevent OS from
* accessing the VF's VSI after it's freed or invalidated.
*/
clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
/* free VSI and disconnect it from the parent uplink */
if (vf->lan_vsi_idx != ICE_NO_VSI) {
ice_vf_vsi_release(vf);
vf->num_mac = 0;
}
last_vector_idx = vf->first_vector_idx + pf->num_msix_per_vf - 1;
/* clear VF MDD event information */
memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
/* Disable interrupts so that VF starts in a known state */
for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
ice_flush(&pf->hw);
}
/* reset some of the state variables keeping track of the resources */
clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
}
/**
* ice_dis_vf_mappings
* @vf: pointer to the VF structure
*/
static void ice_dis_vf_mappings(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
struct device *dev;
int first, last, v;
struct ice_hw *hw;
hw = &pf->hw;
vsi = pf->vsi[vf->lan_vsi_idx];
dev = ice_pf_to_dev(pf);
wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
first = vf->first_vector_idx;
last = first + pf->num_msix_per_vf - 1;
for (v = first; v <= last; v++) {
u32 reg;
reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) &
GLINT_VECT2FUNC_IS_PF_M) |
((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
GLINT_VECT2FUNC_PF_NUM_M));
wr32(hw, GLINT_VECT2FUNC(v), reg);
}
if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
else
dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
else
dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
}
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
/**
* ice_sriov_free_msix_res - Reset/free any used MSIX resources
* @pf: pointer to the PF structure
*
* Since no MSIX entries are taken from the pf->irq_tracker then just clear
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
* the pf->sriov_base_vector.
*
* Returns 0 on success, and -EINVAL on error.
*/
static int ice_sriov_free_msix_res(struct ice_pf *pf)
{
struct ice_res_tracker *res;
if (!pf)
return -EINVAL;
res = pf->irq_tracker;
if (!res)
return -EINVAL;
/* give back irq_tracker resources used */
WARN_ON(pf->sriov_base_vector < res->num_entries);
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
pf->sriov_base_vector = 0;
return 0;
}
/**
* ice_set_vf_state_qs_dis - Set VF queues state to disabled
* @vf: pointer to the VF structure
*/
void ice_set_vf_state_qs_dis(struct ice_vf *vf)
{
/* Clear Rx/Tx enabled queues flag */
bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF);
bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
}
/**
* ice_dis_vf_qs - Disable the VF queues
* @vf: pointer to the VF structure
*/
static void ice_dis_vf_qs(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
vsi = pf->vsi[vf->lan_vsi_idx];
ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
ice_vsi_stop_all_rx_rings(vsi);
ice_set_vf_state_qs_dis(vf);
}
/**
* ice_free_vfs - Free all VFs
* @pf: pointer to the PF structure
*/
void ice_free_vfs(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
unsigned int tmp, i;
if (!pf->vf)
return;
while (test_and_set_bit(__ICE_VF_DIS, pf->state))
usleep_range(1000, 2000);
/* Disable IOV before freeing resources. This lets any VF drivers
* running in the host get themselves cleaned up before we yank
* the carpet out from underneath their feet.
*/
if (!pci_vfs_assigned(pf->pdev))
pci_disable_sriov(pf->pdev);
else
dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
ice: Fix removing driver while bare-metal VFs pass traffic Currently, if there are bare-metal VFs passing traffic and the ice driver is removed, there is a possibility of VFs triggering a Tx timeout right before iavf_remove(). This is causing iavf_close() to not be called because there is a check in the beginning of iavf_remove() that bails out early if (adapter->state < IAVF_DOWN_PENDING). This makes it so some resources do not get cleaned up. Specifically, free_irq() is never called for data interrupts, which results in the following line of code to trigger: pci_disable_msix() free_msi_irqs() ... BUG_ON(irq_has_action(entry->irq + i)); ... To prevent the Tx timeout from occurring on the VF during driver unload for ice and the iavf there are a few changes that are needed. [1] Don't disable all active VF Tx/Rx queues prior to calling pci_disable_sriov. [2] Call ice_free_vfs() before disabling the service task. [3] Disable VF resets when the ice driver is being unloaded by setting the pf->state flag __ICE_VF_RESETS_DISABLED. Changing [1] and [2] allow each VF driver's remove flow to successfully send VIRTCHNL requests, which includes queue disable. This prevents unexpected Tx timeouts because the PF driver is no longer forcefully disabling queues. Due to [1] and [2] there is a possibility that the PF driver will get a VFLR or reset request over VIRTCHNL from a VF during PF driver unload. Prevent that by doing [3]. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-27 18:14:55 +00:00
/* Avoid wait time by stopping all VFs at the same time */
ice_for_each_vf(pf, i)
if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states))
ice_dis_vf_qs(&pf->vf[i]);
tmp = pf->num_alloc_vfs;
pf->num_qps_per_vf = 0;
pf->num_alloc_vfs = 0;
for (i = 0; i < tmp; i++) {
if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
/* disable VF qp mappings and set VF disable state */
ice_dis_vf_mappings(&pf->vf[i]);
set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
ice_free_vf_res(&pf->vf[i]);
}
}
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
if (ice_sriov_free_msix_res(pf))
dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
devm_kfree(dev, pf->vf);
pf->vf = NULL;
/* This check is for when the driver is unloaded while VFs are
* assigned. Setting the number of VFs to 0 through sysfs is caught
* before this function ever gets called.
*/
if (!pci_vfs_assigned(pf->pdev)) {
unsigned int vf_id;
/* Acknowledge VFLR for all VFs. Without this, VFs will fail to
* work correctly when SR-IOV gets re-enabled.
*/
for (vf_id = 0; vf_id < tmp; vf_id++) {
u32 reg_idx, bit_idx;
reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
}
}
clear_bit(__ICE_VF_DIS, pf->state);
clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
}
/**
* ice_trigger_vf_reset - Reset a VF on HW
* @vf: pointer to the VF structure
* @is_vflr: true if VFLR was issued, false if not
* @is_pfr: true if the reset was triggered due to a previous PFR
*
* Trigger hardware to start a reset for a particular VF. Expects the caller
* to wait the proper amount of time to allow hardware to reset the VF before
* it cleans up and restores VF functionality.
*/
static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
{
struct ice_pf *pf = vf->pf;
u32 reg, reg_idx, bit_idx;
unsigned int vf_abs_id, i;
struct device *dev;
struct ice_hw *hw;
dev = ice_pf_to_dev(pf);
hw = &pf->hw;
vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
/* Inform VF that it is no longer active, as a warning */
clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
/* Disable VF's configuration API during reset. The flag is re-enabled
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
* when it's safe again to access VF's VSI.
*/
clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
/* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it
* in the case of VFR. If this is done for PFR, it can mess up VF
* resets because the VF driver may already have started cleanup
* by the time we get here.
*/
if (!is_pfr)
wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
/* In the case of a VFLR, the HW has already reset the VF and we
* just need to clean up, so don't hit the VFRTRIG register.
*/
if (!is_vflr) {
/* reset VF using VPGEN_VFRTRIG reg */
reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
reg |= VPGEN_VFRTRIG_VFSWR_M;
wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
}
/* clear the VFLR bit in GLGEN_VFLRSTAT */
reg_idx = (vf_abs_id) / 32;
bit_idx = (vf_abs_id) % 32;
wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
ice_flush(hw);
wr32(hw, PF_PCI_CIAA,
VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S));
for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) {
reg = rd32(hw, PF_PCI_CIAD);
/* no transactions pending so stop polling */
if ((reg & VF_TRANS_PENDING_M) == 0)
break;
dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id);
udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
}
}
/**
* ice_vsi_manage_pvid - Enable or disable port VLAN for VSI
* @vsi: the VSI to update
* @pvid_info: VLAN ID and QoS used to set the PVID VSI context field
* @enable: true for enable PVID false for disable
*/
static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 pvid_info, bool enable)
{
struct ice_hw *hw = &vsi->back->hw;
struct ice_aqc_vsi_props *info;
struct ice_vsi_ctx *ctxt;
enum ice_status status;
int ret = 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
ctxt->info = vsi->info;
info = &ctxt->info;
if (enable) {
info->vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED |
ICE_AQ_VSI_PVLAN_INSERT_PVID |
ICE_AQ_VSI_VLAN_EMOD_STR;
info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
} else {
info->vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING |
ICE_AQ_VSI_VLAN_MODE_ALL;
info->sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
}
info->pvid = cpu_to_le16(pvid_info);
info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
ICE_AQ_VSI_PROP_SW_VALID);
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %s aq_err %s\n",
ice_stat_str(status),
ice_aq_str(hw->adminq.sq_last_status));
ret = -EIO;
goto out;
}
vsi->info.vlan_flags = info->vlan_flags;
vsi->info.sw_flags2 = info->sw_flags2;
vsi->info.pvid = info->pvid;
out:
kfree(ctxt);
return ret;
}
/**
* ice_vf_get_port_info - Get the VF's port info structure
* @vf: VF used to get the port info structure for
*/
static struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf)
{
return vf->pf->hw.port_info;
}
/**
* ice_vf_vsi_setup - Set up a VF VSI
* @vf: VF to setup VSI for
*
* Returns pointer to the successfully allocated VSI struct on success,
* otherwise returns NULL on failure.
*/
static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
{
struct ice_port_info *pi = ice_vf_get_port_info(vf);
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id);
if (!vsi) {
dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
ice_vf_invalidate_vsi(vf);
return NULL;
}
vf->lan_vsi_idx = vsi->idx;
vf->lan_vsi_num = vsi->vsi_num;
return vsi;
}
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
/**
* ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
* @pf: pointer to PF structure
* @vf: pointer to VF that the first MSIX vector index is being calculated for
*
* This returns the first MSIX vector index in PF space that is used by this VF.
* This index is used when accessing PF relative registers such as
* GLINT_VECT2FUNC and GLINT_DYN_CTL.
* This will always be the OICR index in the AVF driver so any functionality
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
* using vf->first_vector_idx for queue configuration will have to increment by
* 1 to avoid meddling with the OICR index.
*/
static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
{
return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf;
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
}
/**
* ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
* @vf: VF to add MAC filters for
*
* Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
* always re-adds either a VLAN 0 or port VLAN based filter after reset.
*/
static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf)
{
struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
struct device *dev = ice_pf_to_dev(vf->pf);
u16 vlan_id = 0;
int err;
if (vf->port_vlan_info) {
err = ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true);
if (err) {
dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n",
vf->vf_id, err);
return err;
}
vlan_id = vf->port_vlan_info & VLAN_VID_MASK;
}
/* vlan_id will either be 0 or the port VLAN number */
err = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI);
if (err) {
dev_err(dev, "failed to add %s VLAN %u filter for VF %u, error %d\n",
vf->port_vlan_info ? "port" : "", vlan_id, vf->vf_id,
err);
return err;
}
return 0;
}
/**
* ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA
* @vf: VF to add MAC filters for
*
* Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
* always re-adds a broadcast filter and the VF's perm_addr/LAA after reset.
*/
static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
{
struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
struct device *dev = ice_pf_to_dev(vf->pf);
enum ice_status status;
u8 broadcast[ETH_ALEN];
eth_broadcast_addr(broadcast);
status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
if (status) {
dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %s\n",
vf->vf_id, ice_stat_str(status));
return ice_status_to_errno(status);
}
vf->num_mac++;
if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr,
ICE_FWD_TO_VSI);
if (status) {
dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %s\n",
&vf->dflt_lan_addr.addr[0], vf->vf_id,
ice_stat_str(status));
return ice_status_to_errno(status);
}
vf->num_mac++;
}
return 0;
}
/**
* ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value
* @vf: VF to configure trust setting for
*/
static void ice_vf_set_host_trust_cfg(struct ice_vf *vf)
{
if (vf->trusted)
set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
else
clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
}
/**
* ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware
* @vf: VF to enable MSIX mappings for
*
* Some of the registers need to be indexed/configured using hardware global
* device values and other registers need 0-based values, which represent PF
* based values.
*/
static void ice_ena_vf_msix_mappings(struct ice_vf *vf)
{
int device_based_first_msix, device_based_last_msix;
int pf_based_first_msix, pf_based_last_msix, v;
struct ice_pf *pf = vf->pf;
int device_based_vf_id;
struct ice_hw *hw;
u32 reg;
hw = &pf->hw;
pf_based_first_msix = vf->first_vector_idx;
pf_based_last_msix = (pf_based_first_msix + pf->num_msix_per_vf) - 1;
device_based_first_msix = pf_based_first_msix +
pf->hw.func_caps.common_cap.msix_vector_first_id;
device_based_last_msix =
(device_based_first_msix + pf->num_msix_per_vf) - 1;
device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) &
VPINT_ALLOC_FIRST_M) |
((device_based_last_msix << VPINT_ALLOC_LAST_S) &
VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M);
wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S)
& VPINT_ALLOC_PCI_FIRST_M) |
((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) &
VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M);
wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
/* map the interrupts to its functions */
for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) {
reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
GLINT_VECT2FUNC_VF_NUM_M) |
((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
GLINT_VECT2FUNC_PF_NUM_M));
wr32(hw, GLINT_VECT2FUNC(v), reg);
}
/* Map mailbox interrupt to VF MSI-X vector 0 */
wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M);
}
/**
* ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF
* @vf: VF to enable the mappings for
* @max_txq: max Tx queues allowed on the VF's VSI
* @max_rxq: max Rx queues allowed on the VF's VSI
*/
static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
{
struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
struct device *dev = ice_pf_to_dev(vf->pf);
struct ice_hw *hw = &vf->pf->hw;
u32 reg;
/* set regardless of mapping mode */
wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
/* VF Tx queues allocation */
if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
/* set the VF PF Tx queue range
* VFNUMQ value should be set to (number of queues - 1). A value
* of 0 means 1 queue and a value of 255 means 256 queues
*/
reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) &
VPLAN_TX_QBASE_VFFIRSTQ_M) |
(((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
VPLAN_TX_QBASE_VFNUMQ_M));
wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
} else {
dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
}
/* set regardless of mapping mode */
wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
/* VF Rx queues allocation */
if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
/* set the VF PF Rx queue range
* VFNUMQ value should be set to (number of queues - 1). A value
* of 0 means 1 queue and a value of 255 means 256 queues
*/
reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) &
VPLAN_RX_QBASE_VFFIRSTQ_M) |
(((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
VPLAN_RX_QBASE_VFNUMQ_M));
wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
} else {
dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
}
}
/**
* ice_ena_vf_mappings - enable VF MSIX and queue mapping
* @vf: pointer to the VF structure
*/
static void ice_ena_vf_mappings(struct ice_vf *vf)
{
struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
ice_ena_vf_msix_mappings(vf);
ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq);
}
/**
* ice_determine_res
* @pf: pointer to the PF structure
* @avail_res: available resources in the PF structure
* @max_res: maximum resources that can be given per VF
* @min_res: minimum resources that can be given per VF
*
* Returns non-zero value if resources (queues/vectors) are available or
* returns zero if PF cannot accommodate for all num_alloc_vfs.
*/
static int
ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
{
bool checked_min_res = false;
int res;
/* start by checking if PF can assign max number of resources for
* all num_alloc_vfs.
* if yes, return number per VF
* If no, divide by 2 and roundup, check again
* repeat the loop till we reach a point where even minimum resources
* are not available, in that case return 0
*/
res = max_res;
while ((res >= min_res) && !checked_min_res) {
int num_all_res;
num_all_res = pf->num_alloc_vfs * res;
if (num_all_res <= avail_res)
return res;
if (res == min_res)
checked_min_res = true;
res = DIV_ROUND_UP(res, 2);
}
return 0;
}
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
/**
* ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
* @vf: VF to calculate the register index for
* @q_vector: a q_vector associated to the VF
*/
int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
{
struct ice_pf *pf;
if (!vf || !q_vector)
return -EINVAL;
pf = vf->pf;
/* always add one to account for the OICR being the first MSIX */
return pf->sriov_base_vector + pf->num_msix_per_vf * vf->vf_id +
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
q_vector->v_idx + 1;
}
/**
* ice_get_max_valid_res_idx - Get the max valid resource index
* @res: pointer to the resource to find the max valid index for
*
* Start from the end of the ice_res_tracker and return right when we find the
* first res->list entry with the ICE_RES_VALID_BIT set. This function is only
* valid for SR-IOV because it is the only consumer that manipulates the
* res->end and this is always called when res->end is set to res->num_entries.
*/
static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
{
int i;
if (!res)
return -EINVAL;
for (i = res->num_entries - 1; i >= 0; i--)
if (res->list[i] & ICE_RES_VALID_BIT)
return i;
return 0;
}
/**
* ice_sriov_set_msix_res - Set any used MSIX resources
* @pf: pointer to PF structure
* @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
*
* This function allows SR-IOV resources to be taken from the end of the PF's
* allowed HW MSIX vectors so that the irq_tracker will not be affected. We
* just set the pf->sriov_base_vector and return success.
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
*
* If there are not enough resources available, return an error. This should
* always be caught by ice_set_per_vf_res().
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
*
* Return 0 on success, and -EINVAL when there are not enough MSIX vectors in
* in the PF's space available for SR-IOV.
*/
static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
{
u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
int vectors_used = pf->irq_tracker->num_entries;
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
int sriov_base_vector;
sriov_base_vector = total_vectors - num_msix_needed;
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
/* make sure we only grab irq_tracker entries from the list end and
* that we have enough available MSIX vectors
*/
if (sriov_base_vector < vectors_used)
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
return -EINVAL;
pf->sriov_base_vector = sriov_base_vector;
return 0;
}
/**
* ice_set_per_vf_res - check if vectors and queues are available
* @pf: pointer to the PF structure
*
* First, determine HW interrupts from common pool. If we allocate fewer VFs, we
* get more vectors and can enable more queues per VF. Note that this does not
* grab any vectors from the SW pool already allocated. Also note, that all
* vector counts include one for each VF's miscellaneous interrupt vector
* (i.e. OICR).
*
* Minimum VFs - 2 vectors, 1 queue pair
* Small VFs - 5 vectors, 4 queue pairs
* Medium VFs - 17 vectors, 16 queue pairs
*
* Second, determine number of queue pairs per VF by starting with a pre-defined
* maximum each VF supports. If this is not possible, then we adjust based on
* queue pairs available on the device.
*
* Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used
* by each VF during VF initialization and reset.
*/
static int ice_set_per_vf_res(struct ice_pf *pf)
{
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
int msix_avail_per_vf, msix_avail_for_sriov;
struct device *dev = ice_pf_to_dev(pf);
u16 num_msix_per_vf, num_txq, num_rxq;
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
return -EINVAL;
/* determine MSI-X resources per VF */
msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
pf->irq_tracker->num_entries;
msix_avail_per_vf = msix_avail_for_sriov / pf->num_alloc_vfs;
if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
} else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) {
num_msix_per_vf = ICE_NUM_VF_MSIX_SMALL;
} else if (msix_avail_per_vf >= ICE_MIN_INTR_PER_VF) {
num_msix_per_vf = ICE_MIN_INTR_PER_VF;
} else {
dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n",
msix_avail_for_sriov, ICE_MIN_INTR_PER_VF,
pf->num_alloc_vfs);
return -EIO;
}
/* determine queue resources per VF */
num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf),
min_t(u16,
num_msix_per_vf - ICE_NONQ_VECS_VF,
ICE_MAX_RSS_QS_PER_VF),
ICE_MIN_QS_PER_VF);
num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf),
min_t(u16,
num_msix_per_vf - ICE_NONQ_VECS_VF,
ICE_MAX_RSS_QS_PER_VF),
ICE_MIN_QS_PER_VF);
if (!num_txq || !num_rxq) {
dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n",
ICE_MIN_QS_PER_VF, pf->num_alloc_vfs);
return -EIO;
}
if (ice_sriov_set_msix_res(pf, num_msix_per_vf * pf->num_alloc_vfs)) {
dev_err(dev, "Unable to set MSI-X resources for %d VFs\n",
pf->num_alloc_vfs);
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
return -EINVAL;
}
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
/* only allow equal Tx/Rx queue count (i.e. queue pairs) */
pf->num_qps_per_vf = min_t(int, num_txq, num_rxq);
pf->num_msix_per_vf = num_msix_per_vf;
dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n",
pf->num_alloc_vfs, pf->num_msix_per_vf, pf->num_qps_per_vf);
return 0;
}
/**
* ice_clear_vf_reset_trigger - enable VF to access hardware
* @vf: VF to enabled hardware access for
*/
static void ice_clear_vf_reset_trigger(struct ice_vf *vf)
{
struct ice_hw *hw = &vf->pf->hw;
u32 reg;
reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
reg &= ~VPGEN_VFRTRIG_VFSWR_M;
wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
ice_flush(hw);
}
/**
* ice_vf_set_vsi_promisc - set given VF VSI to given promiscuous mode(s)
* @vf: pointer to the VF info
* @vsi: the VSI being configured
* @promisc_m: mask of promiscuous config bits
* @rm_promisc: promisc flag request from the VF to remove or add filter
*
* This function configures VF VSI promiscuous mode, based on the VF requests,
* for Unicast, Multicast and VLAN
*/
static enum ice_status
ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
bool rm_promisc)
{
struct ice_pf *pf = vf->pf;
enum ice_status status = 0;
struct ice_hw *hw;
hw = &pf->hw;
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
if (vsi->num_vlan) {
status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
rm_promisc);
} else if (vf->port_vlan_info) {
if (rm_promisc)
status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
vf->port_vlan_info);
else
status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
vf->port_vlan_info);
} else {
if (rm_promisc)
status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
0);
else
status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
0);
}
return status;
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
static void ice_vf_clear_counters(struct ice_vf *vf)
{
struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
vf->num_mac = 0;
vsi->num_vlan = 0;
memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
}
/**
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
* ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild
* @vf: VF to perform pre VSI rebuild tasks
*
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
* These tasks are items that don't need to be amortized since they are most
* likely called in a for loop with all VF(s) in the reset_all_vfs() case.
*/
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
{
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
ice_vf_clear_counters(vf);
ice_clear_vf_reset_trigger(vf);
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
/**
* ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
* @vf: VF to rebuild host configuration on
*/
static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
{
struct device *dev = ice_pf_to_dev(vf->pf);
ice_vf_set_host_trust_cfg(vf);
if (ice_vf_rebuild_host_mac_cfg(vf))
dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n",
vf->vf_id);
if (ice_vf_rebuild_host_vlan_cfg(vf))
dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
vf->vf_id);
}
/**
* ice_vf_rebuild_vsi_with_release - release and setup the VF's VSI
* @vf: VF to release and setup the VSI for
*
* This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF
* configuration change, etc.).
*/
static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf)
{
ice_vf_vsi_release(vf);
if (!ice_vf_vsi_setup(vf))
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
return -ENOMEM;
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
return 0;
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
/**
* ice_vf_rebuild_vsi - rebuild the VF's VSI
* @vf: VF to rebuild the VSI for
*
* This is only called when all VF(s) are being reset (i.e. PCIe Reset on the
* host, PFR, CORER, etc.).
*/
static int ice_vf_rebuild_vsi(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
vsi = pf->vsi[vf->lan_vsi_idx];
if (ice_vsi_rebuild(vsi, true)) {
dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n",
vf->vf_id);
return -EIO;
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
/* vsi->idx will remain the same in this case so don't update
* vf->lan_vsi_idx
*/
vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
vf->lan_vsi_num = vsi->vsi_num;
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
return 0;
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
/**
* ice_vf_set_initialized - VF is ready for VIRTCHNL communication
* @vf: VF to set in initialized state
*
* After this function the VF will be ready to receive/handle the
* VIRTCHNL_OP_GET_VF_RESOURCES message
*/
static void ice_vf_set_initialized(struct ice_vf *vf)
{
ice_set_vf_state_qs_dis(vf);
clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
set_bit(ICE_VF_STATE_INIT, vf->vf_states);
}
/**
* ice_vf_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt
* @vf: VF to perform tasks on
*/
static void ice_vf_post_vsi_rebuild(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
struct ice_hw *hw;
hw = &pf->hw;
ice_vf_rebuild_host_cfg(vf);
ice_vf_set_initialized(vf);
ice_ena_vf_mappings(vf);
wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
}
/**
* ice_reset_all_vfs - reset all allocated VFs in one go
* @pf: pointer to the PF structure
* @is_vflr: true if VFLR was issued, false if not
*
* First, tell the hardware to reset each VF, then do all the waiting in one
* chunk, and finally finish restoring each VF after the wait. This is useful
* during PF routines which need to reset all VFs, as otherwise it must perform
* these resets in a serialized fashion.
*
* Returns true if any VFs were reset, and false otherwise.
*/
bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
struct ice_vf *vf;
int v, i;
/* If we don't have any VFs, then there is nothing to reset */
if (!pf->num_alloc_vfs)
return false;
/* If VFs have been disabled, there is no need to reset */
if (test_and_set_bit(__ICE_VF_DIS, pf->state))
return false;
/* Begin reset on all VFs at once */
ice_for_each_vf(pf, v)
ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
/* HW requires some time to make sure it can flush the FIFO for a VF
* when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
* sequence to make sure that it has completed. We'll keep track of
* the VFs using a simple iterator that increments once that VF has
* finished resetting.
*/
for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
/* Check each VF in sequence */
while (v < pf->num_alloc_vfs) {
u32 reg;
vf = &pf->vf[v];
reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id));
if (!(reg & VPGEN_VFRSTAT_VFRD_M)) {
/* only delay if the check failed */
usleep_range(10, 20);
break;
}
/* If the current VF has finished resetting, move on
* to the next VF in sequence.
*/
v++;
}
}
/* Display a warning if at least one VF didn't manage to reset in
* time, but continue on with the operation.
*/
if (v < pf->num_alloc_vfs)
dev_warn(dev, "VF reset check timeout\n");
/* free VF resources to begin resetting the VSI state */
ice_for_each_vf(pf, v) {
vf = &pf->vf[v];
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
ice_vf_pre_vsi_rebuild(vf);
ice_vf_rebuild_vsi(vf);
ice_vf_post_vsi_rebuild(vf);
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
ice_flush(hw);
clear_bit(__ICE_VF_DIS, pf->state);
return true;
}
/**
* ice_is_vf_disabled
* @vf: pointer to the VF info
*
* Returns true if the PF or VF is disabled, false otherwise.
*/
static bool ice_is_vf_disabled(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
/* If the PF has been disabled, there is no need resetting VF until
* PF is active again. Similarly, if the VF has been disabled, this
* means something else is resetting the VF, so we shouldn't continue.
* Otherwise, set disable VF state bit for actual reset, and continue.
*/
return (test_bit(__ICE_VF_DIS, pf->state) ||
test_bit(ICE_VF_STATE_DIS, vf->vf_states));
}
/**
* ice_reset_vf - Reset a particular VF
* @vf: pointer to the VF structure
* @is_vflr: true if VFLR was issued, false if not
*
ice: Fix removing driver while bare-metal VFs pass traffic Currently, if there are bare-metal VFs passing traffic and the ice driver is removed, there is a possibility of VFs triggering a Tx timeout right before iavf_remove(). This is causing iavf_close() to not be called because there is a check in the beginning of iavf_remove() that bails out early if (adapter->state < IAVF_DOWN_PENDING). This makes it so some resources do not get cleaned up. Specifically, free_irq() is never called for data interrupts, which results in the following line of code to trigger: pci_disable_msix() free_msi_irqs() ... BUG_ON(irq_has_action(entry->irq + i)); ... To prevent the Tx timeout from occurring on the VF during driver unload for ice and the iavf there are a few changes that are needed. [1] Don't disable all active VF Tx/Rx queues prior to calling pci_disable_sriov. [2] Call ice_free_vfs() before disabling the service task. [3] Disable VF resets when the ice driver is being unloaded by setting the pf->state flag __ICE_VF_RESETS_DISABLED. Changing [1] and [2] allow each VF driver's remove flow to successfully send VIRTCHNL requests, which includes queue disable. This prevents unexpected Tx timeouts because the PF driver is no longer forcefully disabling queues. Due to [1] and [2] there is a possibility that the PF driver will get a VFLR or reset request over VIRTCHNL from a VF during PF driver unload. Prevent that by doing [3]. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-27 18:14:55 +00:00
* Returns true if the VF is currently in reset, resets successfully, or resets
* are disabled and false otherwise.
*/
bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
struct device *dev;
struct ice_hw *hw;
bool rsd = false;
u8 promisc_m;
u32 reg;
int i;
dev = ice_pf_to_dev(pf);
ice: Fix removing driver while bare-metal VFs pass traffic Currently, if there are bare-metal VFs passing traffic and the ice driver is removed, there is a possibility of VFs triggering a Tx timeout right before iavf_remove(). This is causing iavf_close() to not be called because there is a check in the beginning of iavf_remove() that bails out early if (adapter->state < IAVF_DOWN_PENDING). This makes it so some resources do not get cleaned up. Specifically, free_irq() is never called for data interrupts, which results in the following line of code to trigger: pci_disable_msix() free_msi_irqs() ... BUG_ON(irq_has_action(entry->irq + i)); ... To prevent the Tx timeout from occurring on the VF during driver unload for ice and the iavf there are a few changes that are needed. [1] Don't disable all active VF Tx/Rx queues prior to calling pci_disable_sriov. [2] Call ice_free_vfs() before disabling the service task. [3] Disable VF resets when the ice driver is being unloaded by setting the pf->state flag __ICE_VF_RESETS_DISABLED. Changing [1] and [2] allow each VF driver's remove flow to successfully send VIRTCHNL requests, which includes queue disable. This prevents unexpected Tx timeouts because the PF driver is no longer forcefully disabling queues. Due to [1] and [2] there is a possibility that the PF driver will get a VFLR or reset request over VIRTCHNL from a VF during PF driver unload. Prevent that by doing [3]. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-27 18:14:55 +00:00
if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) {
dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n",
vf->vf_id);
return true;
}
if (ice_is_vf_disabled(vf)) {
dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
vf->vf_id);
return true;
}
/* Set VF disable bit state here, before triggering reset */
set_bit(ICE_VF_STATE_DIS, vf->vf_states);
ice_trigger_vf_reset(vf, is_vflr, false);
vsi = pf->vsi[vf->lan_vsi_idx];
if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
ice_dis_vf_qs(vf);
/* Call Disable LAN Tx queue AQ whether or not queues are
* enabled. This is needed for successful completion of VFR.
*/
ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
NULL, ICE_VF_RESET, vf->vf_id, NULL);
hw = &pf->hw;
/* poll VPGEN_VFRSTAT reg to make sure
* that reset is complete
*/
for (i = 0; i < 10; i++) {
/* VF reset requires driver to first reset the VF and then
* poll the status register to make sure that the reset
* completed successfully.
*/
reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id));
if (reg & VPGEN_VFRSTAT_VFRD_M) {
rsd = true;
break;
}
/* only sleep if the reset is not done */
usleep_range(10, 20);
}
/* Display a warning if VF didn't manage to reset in time, but need to
* continue on with the operation.
*/
if (!rsd)
dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id);
/* disable promiscuous modes in case they were enabled
* ignore any error if disabling process failed
*/
if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
vsi = pf->vsi[vf->lan_vsi_idx];
if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
dev_err(dev, "disabling promiscuous mode failed\n");
}
ice: Refactor VF reset Currently VF VSI are being reset twice during a PFR or greater. This is causing reset, specifically resetting all VFs, to take too long. This is causing various issues with VF drivers not being able to gracefully handle the VF reset timeout. Fix this by refactoring how VF reset is handled for the case mentioned previously and for the VFR/VFLR case. The refactor was done by doing the following: 1. Removing the call to ice_vsi_rebuild_by_type for ICE_VSI_VF VSI, which was causing the initial VSI rebuild. 2. Adding functions for pre/post VSI rebuild functions that can be called in both the reset all VFs case and reset individual VF case. 3. Adding VSI rebuild functions that are specific for the reset all VFs case and adding functions that are specific for the reset individual VF case. 4. Calling the pre-rebuild function, then the specific VSI rebuild function based on the reset type, and then calling the post-rebuild function to handle VF resets. This patch series makes some assumptions about how VSI are handling by FW during reset: 1. During a PFR or greater all VSI in FW will be cleared. 2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of the PF software. 3. There is code in the ice_reset_all_vfs() case to amortize operations if possible. This was left intact. 4. PF software should not be replaying VSI based filters that were added other than host configured, PF software configured, or the VF's default/LAA MAC. This is the VF drivers job after it has been reset. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-05-16 00:51:15 +00:00
ice_vf_pre_vsi_rebuild(vf);
ice_vf_rebuild_vsi_with_release(vf);
ice_vf_post_vsi_rebuild(vf);
return true;
}
/**
* ice_vc_notify_link_state - Inform all VFs on a PF of link status
* @pf: pointer to the PF structure
*/
void ice_vc_notify_link_state(struct ice_pf *pf)
{
int i;
ice_for_each_vf(pf, i)
ice_vc_notify_vf_link_state(&pf->vf[i]);
}
/**
* ice_vc_notify_reset - Send pending reset message to all VFs
* @pf: pointer to the PF structure
*
* indicate a pending reset to all VFs on a given PF
*/
void ice_vc_notify_reset(struct ice_pf *pf)
{
struct virtchnl_pf_event pfe;
if (!pf->num_alloc_vfs)
return;
pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS,
(u8 *)&pfe, sizeof(struct virtchnl_pf_event));
}
/**
* ice_vc_notify_vf_reset - Notify VF of a reset event
* @vf: pointer to the VF structure
*/
static void ice_vc_notify_vf_reset(struct ice_vf *vf)
{
struct virtchnl_pf_event pfe;
struct ice_pf *pf;
if (!vf)
return;
pf = vf->pf;
if (ice_validate_vf_id(pf, vf->vf_id))
return;
/* Bail out if VF is in disabled state, neither initialized, nor active
* state - otherwise proceed with notifications
*/
if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) ||
test_bit(ICE_VF_STATE_DIS, vf->vf_states))
return;
pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
NULL);
}
/**
* ice_init_vf_vsi_res - initialize/setup VF VSI resources
* @vf: VF to initialize/setup the VSI for
*
* This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the
* VF VSI's broadcast filter and is only used during initial VF creation.
*/
static int ice_init_vf_vsi_res(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
u8 broadcast[ETH_ALEN];
enum ice_status status;
struct ice_vsi *vsi;
struct device *dev;
int err;
vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
dev = ice_pf_to_dev(pf);
vsi = ice_vf_vsi_setup(vf);
if (!vsi)
return -ENOMEM;
err = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
if (err) {
dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n",
vf->vf_id);
goto release_vsi;
}
eth_broadcast_addr(broadcast);
status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
if (status) {
dev_err(dev, "Failed to add broadcast MAC filter for VF %d, status %s\n",
vf->vf_id, ice_stat_str(status));
err = ice_status_to_errno(status);
goto release_vsi;
}
vf->num_mac = 1;
return 0;
release_vsi:
ice_vf_vsi_release(vf);
return err;
}
/**
* ice_start_vfs - start VFs so they are ready to be used by SR-IOV
* @pf: PF the VFs are associated with
*/
static int ice_start_vfs(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;
int retval, i;
ice_for_each_vf(pf, i) {
struct ice_vf *vf = &pf->vf[i];
ice_clear_vf_reset_trigger(vf);
retval = ice_init_vf_vsi_res(vf);
if (retval) {
dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n",
vf->vf_id, retval);
goto teardown;
}
set_bit(ICE_VF_STATE_INIT, vf->vf_states);
ice_ena_vf_mappings(vf);
wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
}
ice_flush(hw);
return 0;
teardown:
for (i = i - 1; i >= 0; i--) {
struct ice_vf *vf = &pf->vf[i];
ice_dis_vf_mappings(vf);
ice_vf_vsi_release(vf);
}
return retval;
}
/**
* ice_set_dflt_settings - set VF defaults during initialization/creation
* @pf: PF holding reference to all VFs for default configuration
*/
static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
{
int i;
ice_for_each_vf(pf, i) {
struct ice_vf *vf = &pf->vf[i];
vf->pf = pf;
vf->vf_id = i;
vf->vf_sw_id = pf->first_sw;
/* assign default capabilities */
set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
vf->spoofchk = true;
vf->num_vf_qs = pf->num_qps_per_vf;
}
}
/**
* ice_alloc_vfs - allocate num_vfs in the PF structure
* @pf: PF to store the allocated VFs in
* @num_vfs: number of VFs to allocate
*/
static int ice_alloc_vfs(struct ice_pf *pf, int num_vfs)
{
struct ice_vf *vfs;
vfs = devm_kcalloc(ice_pf_to_dev(pf), num_vfs, sizeof(*vfs),
GFP_KERNEL);
if (!vfs)
return -ENOMEM;
pf->vf = vfs;
pf->num_alloc_vfs = num_vfs;
return 0;
}
/**
* ice_ena_vfs - enable VFs so they are ready to be used
* @pf: pointer to the PF structure
* @num_vfs: number of VFs to enable
*/
static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int ret;
/* Disable global interrupt 0 so we don't try to handle the VFLR. */
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
set_bit(__ICE_OICR_INTR_DIS, pf->state);
ice_flush(hw);
ret = pci_enable_sriov(pf->pdev, num_vfs);
if (ret) {
pf->num_alloc_vfs = 0;
goto err_unroll_intr;
}
ret = ice_alloc_vfs(pf, num_vfs);
if (ret)
goto err_pci_disable_sriov;
if (ice_set_per_vf_res(pf)) {
dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n",
num_vfs);
ret = -ENOSPC;
goto err_unroll_sriov;
}
ice_set_dflt_settings_vfs(pf);
if (ice_start_vfs(pf)) {
dev_err(dev, "Failed to start VF(s)\n");
ret = -EAGAIN;
goto err_unroll_sriov;
}
clear_bit(__ICE_VF_DIS, pf->state);
return 0;
err_unroll_sriov:
devm_kfree(dev, pf->vf);
pf->vf = NULL;
pf->num_alloc_vfs = 0;
err_pci_disable_sriov:
pci_disable_sriov(pf->pdev);
err_unroll_intr:
/* rearm interrupts here */
ice_irq_dynamic_ena(hw, NULL, NULL);
clear_bit(__ICE_OICR_INTR_DIS, pf->state);
return ret;
}
/**
* ice_pf_state_is_nominal - checks the PF for nominal state
* @pf: pointer to PF to check
*
* Check the PF's state for a collection of bits that would indicate
* the PF is in a state that would inhibit normal operation for
* driver functionality.
*
* Returns true if PF is in a nominal state.
* Returns false otherwise
*/
static bool ice_pf_state_is_nominal(struct ice_pf *pf)
{
DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 };
if (!pf)
return false;
bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS);
if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS))
return false;
return true;
}
/**
* ice_pci_sriov_ena - Enable or change number of VFs
* @pf: pointer to the PF structure
* @num_vfs: number of VFs to allocate
*
* Returns 0 on success and negative on failure
*/
static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
{
int pre_existing_vfs = pci_num_vf(pf->pdev);
struct device *dev = ice_pf_to_dev(pf);
int err;
if (pre_existing_vfs && pre_existing_vfs != num_vfs)
ice_free_vfs(pf);
else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
return 0;
if (num_vfs > pf->num_vfs_supported) {
dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n",
num_vfs, pf->num_vfs_supported);
return -EOPNOTSUPP;
}
dev_info(dev, "Enabling %d VFs\n", num_vfs);
err = ice_ena_vfs(pf, num_vfs);
if (err) {
dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
return err;
}
set_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
return 0;
}
/**
* ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks
* @pf: PF to enabled SR-IOV on
*/
static int ice_check_sriov_allowed(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
dev_err(dev, "This device is not capable of SR-IOV\n");
return -EOPNOTSUPP;
}
if (ice_is_safe_mode(pf)) {
dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
return -EOPNOTSUPP;
}
if (!ice_pf_state_is_nominal(pf)) {
dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
return -EBUSY;
}
return 0;
}
/**
* ice_sriov_configure - Enable or change number of VFs via sysfs
* @pdev: pointer to a pci_dev structure
* @num_vfs: number of VFs to allocate or 0 to free VFs
*
* This function is called when the user updates the number of VFs in sysfs. On
* success return whatever num_vfs was set to by the caller. Return negative on
* failure.
*/
int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
struct ice_pf *pf = pci_get_drvdata(pdev);
struct device *dev = ice_pf_to_dev(pf);
int err;
err = ice_check_sriov_allowed(pf);
if (err)
return err;
if (!num_vfs) {
if (!pci_vfs_assigned(pdev)) {
ice_free_vfs(pf);
return 0;
}
dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
return -EBUSY;
}
err = ice_pci_sriov_ena(pf, num_vfs);
if (err)
return err;
return num_vfs;
}
/**
* ice_process_vflr_event - Free VF resources via IRQ calls
* @pf: pointer to the PF structure
*
* called from the VFLR IRQ handler to
* free up VF resources and state variables
*/
void ice_process_vflr_event(struct ice_pf *pf)
{
struct ice_hw *hw = &pf->hw;
unsigned int vf_id;
u32 reg;
if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
!pf->num_alloc_vfs)
return;
ice_for_each_vf(pf, vf_id) {
struct ice_vf *vf = &pf->vf[vf_id];
u32 reg_idx, bit_idx;
reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
/* read GLGEN_VFLRSTAT register to find out the flr VFs */
reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
if (reg & BIT(bit_idx))
/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
ice_reset_vf(vf, true);
}
}
/**
* ice_vc_reset_vf - Perform software reset on the VF after informing the AVF
* @vf: pointer to the VF info
*/
static void ice_vc_reset_vf(struct ice_vf *vf)
{
ice_vc_notify_vf_reset(vf);
ice_reset_vf(vf, false);
}
/**
* ice_get_vf_from_pfq - get the VF who owns the PF space queue passed in
* @pf: PF used to index all VFs
* @pfq: queue index relative to the PF's function space
*
* If no VF is found who owns the pfq then return NULL, otherwise return a
* pointer to the VF who owns the pfq
*/
static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
{
unsigned int vf_id;
ice_for_each_vf(pf, vf_id) {
struct ice_vf *vf = &pf->vf[vf_id];
struct ice_vsi *vsi;
u16 rxq_idx;
vsi = pf->vsi[vf->lan_vsi_idx];
ice_for_each_rxq(vsi, rxq_idx)
if (vsi->rxq_map[rxq_idx] == pfq)
return vf;
}
return NULL;
}
/**
* ice_globalq_to_pfq - convert from global queue index to PF space queue index
* @pf: PF used for conversion
* @globalq: global queue index used to convert to PF space queue index
*/
static u32 ice_globalq_to_pfq(struct ice_pf *pf, u32 globalq)
{
return globalq - pf->hw.func_caps.common_cap.rxq_first_id;
}
/**
* ice_vf_lan_overflow_event - handle LAN overflow event for a VF
* @pf: PF that the LAN overflow event happened on
* @event: structure holding the event information for the LAN overflow event
*
* Determine if the LAN overflow event was caused by a VF queue. If it was not
* caused by a VF, do nothing. If a VF caused this LAN overflow event trigger a
* reset on the offending VF.
*/
void
ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
{
u32 gldcb_rtctq, queue;
struct ice_vf *vf;
gldcb_rtctq = le32_to_cpu(event->desc.params.lan_overflow.prtdcb_ruptq);
dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq);
/* event returns device global Rx queue number */
queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >>
GLDCB_RTCTQ_RXQNUM_S;
vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue));
if (!vf)
return;
ice_vc_reset_vf(vf);
}
/**
* ice_vc_send_msg_to_vf - Send message to VF
* @vf: pointer to the VF info
* @v_opcode: virtual channel opcode
* @v_retval: virtual channel return value
* @msg: pointer to the msg buffer
* @msglen: msg length
*
* send msg to VF
*/
static int
ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
{
enum ice_status aq_ret;
struct device *dev;
struct ice_pf *pf;
if (!vf)
return -EINVAL;
pf = vf->pf;
if (ice_validate_vf_id(pf, vf->vf_id))
return -EINVAL;
dev = ice_pf_to_dev(pf);
/* single place to detect unsuccessful return values */
if (v_retval) {
vf->num_inval_msgs++;
dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
v_opcode, v_retval);
if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
dev_err(dev, "Number of invalid messages exceeded for VF %d\n",
vf->vf_id);
dev_err(dev, "Use PF Control I/F to enable the VF\n");
set_bit(ICE_VF_STATE_DIS, vf->vf_states);
return -EIO;
}
} else {
vf->num_valid_msgs++;
/* reset the invalid counter, if a valid message is received. */
vf->num_inval_msgs = 0;
}
aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
msg, msglen, NULL);
if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
dev_info(dev, "Unable to send the message to VF %d ret %s aq_err %s\n",
vf->vf_id, ice_stat_str(aq_ret),
ice_aq_str(pf->hw.mailboxq.sq_last_status));
return -EIO;
}
return 0;
}
/**
* ice_vc_get_ver_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to request the API version used by the PF
*/
static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
{
struct virtchnl_version_info info = {
VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
};
vf->vf_ver = *(struct virtchnl_version_info *)msg;
/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
if (VF_IS_V10(&vf->vf_ver))
info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
VIRTCHNL_STATUS_SUCCESS, (u8 *)&info,
sizeof(struct virtchnl_version_info));
}
/**
* ice_vc_get_vf_res_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to request its resources
*/
static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vf_resource *vfres = NULL;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
int len = 0;
int ret;
if (ice_check_vf_init(pf, vf)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto err;
}
len = sizeof(struct virtchnl_vf_resource);
vfres = kzalloc(len, GFP_KERNEL);
if (!vfres) {
v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
len = 0;
goto err;
}
if (VF_IS_V11(&vf->vf_ver))
vf->driver_caps = *(u32 *)msg;
else
vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
VIRTCHNL_VF_OFFLOAD_RSS_REG |
VIRTCHNL_VF_OFFLOAD_VLAN;
vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto err;
}
if (!vsi->info.pvid)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
} else {
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
else
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
}
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
vfres->num_vsis = 1;
/* Tx and Rx queue are equal for VF */
vfres->num_queue_pairs = vsi->num_txq;
vfres->max_vectors = pf->num_msix_per_vf;
vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE;
vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
vf->dflt_lan_addr.addr);
/* match guest capabilities */
vf->driver_caps = vfres->vf_cap_flags;
set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
err:
/* send the response back to the VF */
ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
(u8 *)vfres, len);
kfree(vfres);
return ret;
}
/**
* ice_vc_reset_vf_msg
* @vf: pointer to the VF info
*
* called from the VF to reset itself,
* unlike other virtchnl messages, PF driver
* doesn't send the response back to the VF
*/
static void ice_vc_reset_vf_msg(struct ice_vf *vf)
{
if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
ice_reset_vf(vf, false);
}
/**
* ice_find_vsi_from_id
* @pf: the PF structure to search for the VSI
* @id: ID of the VSI it is searching for
*
* searches for the VSI with the given ID
*/
static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id)
{
int i;
ice_for_each_vsi(pf, i)
if (pf->vsi[i] && pf->vsi[i]->vsi_num == id)
return pf->vsi[i];
return NULL;
}
/**
* ice_vc_isvalid_vsi_id
* @vf: pointer to the VF info
* @vsi_id: VF relative VSI ID
*
* check for the valid VSI ID
*/
static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
vsi = ice_find_vsi_from_id(pf, vsi_id);
return (vsi && (vsi->vf_id == vf->vf_id));
}
/**
* ice_vc_isvalid_q_id
* @vf: pointer to the VF info
* @vsi_id: VSI ID
* @qid: VSI relative queue ID
*
* check for the valid queue ID
*/
static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
{
struct ice_vsi *vsi = ice_find_vsi_from_id(vf->pf, vsi_id);
/* allocated Tx and Rx queues should be always equal for VF VSI */
return (vsi && (qid < vsi->alloc_txq));
}
/**
* ice_vc_isvalid_ring_len
* @ring_len: length of ring
*
* check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
* or zero
*/
static bool ice_vc_isvalid_ring_len(u16 ring_len)
{
return ring_len == 0 ||
(ring_len >= ICE_MIN_NUM_DESC &&
ring_len <= ICE_MAX_NUM_DESC &&
!(ring_len % ICE_REQ_DESC_MULTIPLE));
}
/**
* ice_vc_config_rss_key
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* Configure the VF's RSS key
*/
static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_rss_key *vrk =
(struct virtchnl_rss_key *)msg;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (ice_set_rss(vsi, vrk->key, NULL, 0))
v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
NULL, 0);
}
/**
* ice_vc_config_rss_lut
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* Configure the VF's RSS LUT
*/
static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
{
struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE))
v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
NULL, 0);
}
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
/**
* ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset
* @vf: The VF being resseting
*
* The max poll time is about ~800ms, which is about the maximum time it takes
* for a VF to be reset and/or a VF driver to be removed.
*/
static void ice_wait_on_vf_reset(struct ice_vf *vf)
{
int i;
for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) {
if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
break;
msleep(ICE_MAX_VF_RESET_SLEEP_MS);
}
}
/**
* ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried
* @vf: VF to check if it's ready to be configured/queried
*
* The purpose of this function is to make sure the VF is not in reset, not
* disabled, and initialized so it can be configured and/or queried by a host
* administrator.
*/
static int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
{
struct ice_pf *pf;
ice_wait_on_vf_reset(vf);
if (ice_is_vf_disabled(vf))
return -EINVAL;
pf = vf->pf;
if (ice_check_vf_init(pf, vf))
return -EBUSY;
return 0;
}
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
/**
* ice_set_vf_spoofchk
* @netdev: network interface device structure
* @vf_id: VF identifier
* @ena: flag to enable or disable feature
*
* Enable or disable VF spoof checking
*/
int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
struct ice_vsi_ctx *ctx;
struct ice_vsi *vf_vsi;
enum ice_status status;
struct device *dev;
struct ice_vf *vf;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
int ret;
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
vf_vsi = pf->vsi[vf->lan_vsi_idx];
if (!vf_vsi) {
netdev_err(netdev, "VSI %d for VF %d is null\n",
vf->lan_vsi_idx, vf->vf_id);
return -EINVAL;
}
if (vf_vsi->type != ICE_VSI_VF) {
netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
return -ENODEV;
}
if (ena == vf->spoofchk) {
dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
return 0;
}
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->info.sec_flags = vf_vsi->info.sec_flags;
ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
if (ena) {
ctx->info.sec_flags |=
ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
} else {
ctx->info.sec_flags &=
~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
}
status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
if (status) {
dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %s\n",
ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num,
ice_stat_str(status));
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
ret = -EIO;
goto out;
}
/* only update spoofchk state and VSI context on success */
vf_vsi->info.sec_flags = ctx->info.sec_flags;
vf->spoofchk = ena;
out:
kfree(ctx);
return ret;
}
/**
* ice_is_any_vf_in_promisc - check if any VF(s) are in promiscuous mode
* @pf: PF structure for accessing VF(s)
*
* Return false if no VF(s) are in unicast and/or multicast promiscuous mode,
* else return true
*/
bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
{
int vf_idx;
ice_for_each_vf(pf, vf_idx) {
struct ice_vf *vf = &pf->vf[vf_idx];
/* found a VF that has promiscuous mode configured */
if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
return true;
}
return false;
}
/**
* ice_vc_cfg_promiscuous_mode_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to configure VF VSIs promiscuous mode
*/
static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_promisc_info *info =
(struct virtchnl_promisc_info *)msg;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
struct device *dev;
bool rm_promisc;
int ret = 0;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n",
vf->vf_id);
/* Leave v_ret alone, lie to the VF on purpose. */
goto error_param;
}
rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) &&
!(info->flags & FLAG_VF_MULTICAST_PROMISC);
if (vsi->num_vlan || vf->port_vlan_info) {
struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
struct net_device *pf_netdev;
if (!pf_vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
pf_netdev = pf_vsi->netdev;
ret = ice_set_vf_spoofchk(pf_netdev, vf->vf_id, rm_promisc);
if (ret) {
dev_err(dev, "Failed to update spoofchk to %s for VF %d VSI %d when setting promiscuous mode\n",
rm_promisc ? "ON" : "OFF", vf->vf_id,
vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
}
ret = ice_cfg_vlan_pruning(vsi, true, !rm_promisc);
if (ret) {
dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
}
if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
bool set_dflt_vsi = !!(info->flags & FLAG_VF_UNICAST_PROMISC);
if (set_dflt_vsi && !ice_is_dflt_vsi_in_use(pf->first_sw))
/* only attempt to set the default forwarding VSI if
* it's not currently set
*/
ret = ice_set_dflt_vsi(pf->first_sw, vsi);
else if (!set_dflt_vsi &&
ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
/* only attempt to free the default forwarding VSI if we
* are the owner
*/
ret = ice_clear_dflt_vsi(pf->first_sw);
if (ret) {
dev_err(dev, "%sable VF %d as the default VSI failed, error %d\n",
set_dflt_vsi ? "en" : "dis", vf->vf_id, ret);
v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
goto error_param;
}
} else {
enum ice_status status;
u8 promisc_m;
if (info->flags & FLAG_VF_UNICAST_PROMISC) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
} else if (info->flags & FLAG_VF_MULTICAST_PROMISC) {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_MCAST_PROMISC_BITS;
} else {
if (vf->port_vlan_info || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
}
/* Configure multicast/unicast with or without VLAN promiscuous
* mode
*/
status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc);
if (status) {
dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %s\n",
rm_promisc ? "dis" : "en", vf->vf_id,
ice_stat_str(status));
v_ret = ice_err_to_virt_err(status);
goto error_param;
} else {
dev_dbg(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d succeeded\n",
rm_promisc ? "dis" : "en", vf->vf_id);
}
}
if (info->flags & FLAG_VF_MULTICAST_PROMISC)
set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
else
clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
if (info->flags & FLAG_VF_UNICAST_PROMISC)
set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
else
clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
v_ret, NULL, 0);
}
/**
* ice_vc_get_stats_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to get VSI stats
*/
static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_queue_select *vqs =
(struct virtchnl_queue_select *)msg;
struct ice_eth_stats stats = { 0 };
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
ice_update_eth_stats(vsi);
stats = vsi->eth_stats;
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, v_ret,
(u8 *)&stats, sizeof(stats));
}
/**
* ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL
* @vqs: virtchnl_queue_select structure containing bitmaps to validate
*
* Return true on successful validation, else false
*/
static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
{
if ((!vqs->rx_queues && !vqs->tx_queues) ||
vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) ||
vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF))
return false;
return true;
}
/**
* ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
* @vsi: VSI of the VF to configure
* @q_idx: VF queue index used to determine the queue in the PF's space
*/
static void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
{
struct ice_hw *hw = &vsi->back->hw;
u32 pfq = vsi->txq_map[q_idx];
u32 reg;
reg = rd32(hw, QINT_TQCTL(pfq));
/* MSI-X index 0 in the VF's space is always for the OICR, which means
* this is most likely a poll mode VF driver, so don't enable an
* interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
*/
if (!(reg & QINT_TQCTL_MSIX_INDX_M))
return;
wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
}
/**
* ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
* @vsi: VSI of the VF to configure
* @q_idx: VF queue index used to determine the queue in the PF's space
*/
static void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
{
struct ice_hw *hw = &vsi->back->hw;
u32 pfq = vsi->rxq_map[q_idx];
u32 reg;
reg = rd32(hw, QINT_RQCTL(pfq));
/* MSI-X index 0 in the VF's space is always for the OICR, which means
* this is most likely a poll mode VF driver, so don't enable an
* interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
*/
if (!(reg & QINT_RQCTL_MSIX_INDX_M))
return;
wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
}
/**
* ice_vc_ena_qs_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to enable all or specific queue(s)
*/
static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_queue_select *vqs =
(struct virtchnl_queue_select *)msg;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
unsigned long q_map;
u16 vf_q_id;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_validate_vqs_bitmaps(vqs)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Enable only Rx rings, Tx rings were enabled by the FW when the
* Tx queue group list was configured and the context bits were
* programmed using ice_vsi_cfg_txqs
*/
q_map = vqs->rx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Skip queue if enabled */
if (test_bit(vf_q_id, vf->rxq_ena))
continue;
if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) {
dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
vf_q_id, vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
set_bit(vf_q_id, vf->rxq_ena);
}
vsi = pf->vsi[vf->lan_vsi_idx];
q_map = vqs->tx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Skip queue if enabled */
if (test_bit(vf_q_id, vf->txq_ena))
continue;
ice_vf_ena_txq_interrupt(vsi, vf_q_id);
set_bit(vf_q_id, vf->txq_ena);
}
/* Set flag to indicate that queues are enabled */
if (v_ret == VIRTCHNL_STATUS_SUCCESS)
set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
NULL, 0);
}
/**
* ice_vc_dis_qs_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to disable all or specific
* queue(s)
*/
static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_queue_select *vqs =
(struct virtchnl_queue_select *)msg;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
unsigned long q_map;
u16 vf_q_id;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
!test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_validate_vqs_bitmaps(vqs)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (vqs->tx_queues) {
q_map = vqs->tx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
struct ice_ring *ring = vsi->tx_rings[vf_q_id];
struct ice_txq_meta txq_meta = { 0 };
if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Skip queue if not enabled */
if (!test_bit(vf_q_id, vf->txq_ena))
continue;
ice_fill_txq_meta(vsi, ring, &txq_meta);
if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
ring, &txq_meta)) {
dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
vf_q_id, vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Clear enabled queues flag */
clear_bit(vf_q_id, vf->txq_ena);
}
}
q_map = vqs->rx_queues;
/* speed up Rx queue disable by batching them if possible */
if (q_map &&
bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) {
if (ice_vsi_stop_all_rx_rings(vsi)) {
dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n",
vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
} else if (q_map) {
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Skip queue if not enabled */
if (!test_bit(vf_q_id, vf->rxq_ena))
continue;
if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id,
true)) {
dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
vf_q_id, vsi->vsi_num);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Clear enabled queues flag */
clear_bit(vf_q_id, vf->rxq_ena);
}
}
/* Clear enabled queues flag */
if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf))
clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
NULL, 0);
}
/**
* ice_cfg_interrupt
* @vf: pointer to the VF info
* @vsi: the VSI being configured
* @vector_id: vector ID
* @map: vector map for mapping vectors to queues
* @q_vector: structure for interrupt vector
* configure the IRQ to queue map
*/
static int
ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
struct virtchnl_vector_map *map,
struct ice_q_vector *q_vector)
{
u16 vsi_q_id, vsi_q_id_idx;
unsigned long qmap;
q_vector->num_ring_rx = 0;
q_vector->num_ring_tx = 0;
qmap = map->rxq_map;
for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
vsi_q_id = vsi_q_id_idx;
if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
return VIRTCHNL_STATUS_ERR_PARAM;
q_vector->num_ring_rx++;
q_vector->rx.itr_idx = map->rxitr_idx;
vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
q_vector->rx.itr_idx);
}
qmap = map->txq_map;
for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
vsi_q_id = vsi_q_id_idx;
if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
return VIRTCHNL_STATUS_ERR_PARAM;
q_vector->num_ring_tx++;
q_vector->tx.itr_idx = map->txitr_idx;
vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
q_vector->tx.itr_idx);
}
return VIRTCHNL_STATUS_SUCCESS;
}
/**
* ice_vc_cfg_irq_map_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to configure the IRQ to queue map
*/
static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
u16 num_q_vectors_mapped, vsi_id, vector_id;
struct virtchnl_irq_map_info *irqmap_info;
struct virtchnl_vector_map *map;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
int i;
irqmap_info = (struct virtchnl_irq_map_info *)msg;
num_q_vectors_mapped = irqmap_info->num_vectors;
/* Check to make sure number of VF vectors mapped is not greater than
* number of VF vectors originally allocated, and check that
* there is actually at least a single VF queue vector mapped
*/
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
pf->num_msix_per_vf < num_q_vectors_mapped ||
!num_q_vectors_mapped) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
for (i = 0; i < num_q_vectors_mapped; i++) {
struct ice_q_vector *q_vector;
map = &irqmap_info->vecmap[i];
vector_id = map->vector_id;
vsi_id = map->vsi_id;
/* vector_id is always 0-based for each VF, and can never be
* larger than or equal to the max allowed interrupts per VF
*/
if (!(vector_id < pf->num_msix_per_vf) ||
!ice_vc_isvalid_vsi_id(vf, vsi_id) ||
(!vector_id && (map->rxq_map || map->txq_map))) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* No need to map VF miscellaneous or rogue vector */
if (!vector_id)
continue;
/* Subtract non queue vector from vector_id passed by VF
* to get actual number of VSI queue vector array index
*/
q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
if (!q_vector) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* lookout for the invalid queue index */
v_ret = (enum virtchnl_status_code)
ice_cfg_interrupt(vf, vsi, vector_id, map, q_vector);
if (v_ret)
goto error_param;
}
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
NULL, 0);
}
/**
* ice_vc_cfg_qs_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* called from the VF to configure the Rx/Tx queues
*/
static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vsi_queue_config_info *qci =
(struct virtchnl_vsi_queue_config_info *)msg;
struct virtchnl_queue_pair_info *qpi;
u16 num_rxq = 0, num_txq = 0;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
int i;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
for (i = 0; i < qci->num_queue_pairs; i++) {
qpi = &qci->qpair[i];
if (qpi->txq.vsi_id != qci->vsi_id ||
qpi->rxq.vsi_id != qci->vsi_id ||
qpi->rxq.queue_id != qpi->txq.queue_id ||
qpi->txq.headwb_enabled ||
!ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
!ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
!ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* copy Tx queue info from VF into VSI */
if (qpi->txq.ring_len > 0) {
num_txq++;
vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
vsi->tx_rings[i]->count = qpi->txq.ring_len;
}
/* copy Rx queue info from VF into VSI */
if (qpi->rxq.ring_len > 0) {
num_rxq++;
vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
vsi->rx_rings[i]->count = qpi->rxq.ring_len;
if (qpi->rxq.databuffer_size != 0 &&
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
qpi->rxq.databuffer_size < 1024)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi->rx_buf_len = qpi->rxq.databuffer_size;
vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
if (qpi->rxq.max_pkt_size >= (16 * 1024) ||
qpi->rxq.max_pkt_size < 64) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
}
vsi->max_frame = qpi->rxq.max_pkt_size;
}
/* VF can request to configure less than allocated queues
* or default allocated queues. So update the VSI with new number
*/
vsi->num_txq = num_txq;
vsi->num_rxq = num_rxq;
/* All queues of VF VSI are in TC 0 */
vsi->tc_cfg.tc_info[0].qcount_tx = num_txq;
vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq;
if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi))
v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,
NULL, 0);
}
/**
* ice_is_vf_trusted
* @vf: pointer to the VF info
*/
static bool ice_is_vf_trusted(struct ice_vf *vf)
{
return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
}
/**
* ice_can_vf_change_mac
* @vf: pointer to the VF info
*
* Return true if the VF is allowed to change its MAC filters, false otherwise
*/
static bool ice_can_vf_change_mac(struct ice_vf *vf)
{
/* If the VF MAC address has been set administratively (via the
* ndo_set_vf_mac command), then deny permission to the VF to
* add/delete unicast MAC addresses, unless the VF is trusted
*/
if (vf->pf_set_mac && !ice_is_vf_trusted(vf))
return false;
return true;
}
/**
* ice_vc_add_mac_addr - attempt to add the MAC address passed in
* @vf: pointer to the VF info
* @vsi: pointer to the VF's VSI
* @mac_addr: MAC address to add
*/
static int
ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
{
struct device *dev = ice_pf_to_dev(vf->pf);
enum ice_status status;
/* default unicast MAC already added */
if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
return 0;
if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
return -EPERM;
}
status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
if (status == ICE_ERR_ALREADY_EXISTS) {
dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr,
vf->vf_id);
return -EEXIST;
} else if (status) {
dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n",
mac_addr, vf->vf_id, ice_stat_str(status));
return -EIO;
}
/* Set the default LAN address to the latest unicast MAC address added
* by the VF. The default LAN address is reported by the PF via
* ndo_get_vf_config.
*/
if (is_unicast_ether_addr(mac_addr))
ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr);
vf->num_mac++;
return 0;
}
/**
* ice_vc_del_mac_addr - attempt to delete the MAC address passed in
* @vf: pointer to the VF info
* @vsi: pointer to the VF's VSI
* @mac_addr: MAC address to delete
*/
static int
ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
{
struct device *dev = ice_pf_to_dev(vf->pf);
enum ice_status status;
if (!ice_can_vf_change_mac(vf) &&
ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
return 0;
status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
if (status == ICE_ERR_DOES_NOT_EXIST) {
dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
vf->vf_id);
return -ENOENT;
} else if (status) {
dev_err(dev, "Failed to delete MAC %pM for VF %d, error %s\n",
mac_addr, vf->vf_id, ice_stat_str(status));
return -EIO;
}
if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
eth_zero_addr(vf->dflt_lan_addr.addr);
vf->num_mac--;
return 0;
}
/**
* ice_vc_handle_mac_addr_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
* @set: true if MAC filters are being set, false otherwise
*
* add guest MAC address filter
*/
static int
ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
{
int (*ice_vc_cfg_mac)
(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr);
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_ether_addr_list *al =
(struct virtchnl_ether_addr_list *)msg;
struct ice_pf *pf = vf->pf;
enum virtchnl_ops vc_op;
struct ice_vsi *vsi;
int i;
if (set) {
vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
ice_vc_cfg_mac = ice_vc_add_mac_addr;
} else {
vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
ice_vc_cfg_mac = ice_vc_del_mac_addr;
}
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
!ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto handle_mac_exit;
}
/* If this VF is not privileged, then we can't add more than a
* limited number of addresses. Check to make sure that the
* additions do not push us over the limit.
*/
if (set && !ice_is_vf_trusted(vf) &&
(vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
vf->vf_id);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto handle_mac_exit;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto handle_mac_exit;
}
for (i = 0; i < al->num_elements; i++) {
u8 *mac_addr = al->list[i].addr;
int result;
if (is_broadcast_ether_addr(mac_addr) ||
is_zero_ether_addr(mac_addr))
continue;
result = ice_vc_cfg_mac(vf, vsi, mac_addr);
if (result == -EEXIST || result == -ENOENT) {
continue;
} else if (result) {
v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
goto handle_mac_exit;
}
}
handle_mac_exit:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
}
/**
* ice_vc_add_mac_addr_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* add guest MAC address filter
*/
static int ice_vc_add_mac_addr_msg(struct ice_vf *vf, u8 *msg)
{
return ice_vc_handle_mac_addr_msg(vf, msg, true);
}
/**
* ice_vc_del_mac_addr_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* remove guest MAC address filter
*/
static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg)
{
return ice_vc_handle_mac_addr_msg(vf, msg, false);
}
/**
* ice_vc_request_qs_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* VFs get a default number of queues but can use this message to request a
* different number. If the request is successful, PF will reset the VF and
* return 0. If unsuccessful, PF will send message informing VF of number of
* available queue pairs via virtchnl message response to VF.
*/
static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vf_res_request *vfres =
(struct virtchnl_vf_res_request *)msg;
u16 req_queues = vfres->num_queue_pairs;
struct ice_pf *pf = vf->pf;
u16 max_allowed_vf_queues;
u16 tx_rx_queue_left;
struct device *dev;
u16 cur_queues;
dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
cur_queues = vf->num_vf_qs;
tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
ice_get_avail_rxq_count(pf));
max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
if (!req_queues) {
dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
vf->vf_id);
} else if (req_queues > ICE_MAX_RSS_QS_PER_VF) {
dev_err(dev, "VF %d tried to request more than %d queues.\n",
vf->vf_id, ICE_MAX_RSS_QS_PER_VF);
vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF;
} else if (req_queues > cur_queues &&
req_queues - cur_queues > tx_rx_queue_left) {
dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
ICE_MAX_RSS_QS_PER_VF);
} else {
/* request is successful, then reset VF */
vf->num_req_qs = req_queues;
ice_vc_reset_vf(vf);
dev_info(dev, "VF %d granted request of %u queues.\n",
vf->vf_id, req_queues);
return 0;
}
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
v_ret, (u8 *)vfres, sizeof(*vfres));
}
/**
* ice_set_vf_port_vlan
* @netdev: network interface device structure
* @vf_id: VF identifier
* @vlan_id: VLAN ID being set
* @qos: priority setting
* @vlan_proto: VLAN protocol
*
* program VF Port VLAN ID and/or QoS
*/
int
ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
__be16 vlan_proto)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct device *dev;
struct ice_vf *vf;
u16 vlanprio;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
int ret;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
if (vlan_id >= VLAN_N_VID || qos > 7) {
dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n",
vf_id, vlan_id, qos);
return -EINVAL;
}
if (vlan_proto != htons(ETH_P_8021Q)) {
dev_err(dev, "VF VLAN protocol is not supported\n");
return -EPROTONOSUPPORT;
}
vf = &pf->vf[vf_id];
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
vlanprio = vlan_id | (qos << VLAN_PRIO_SHIFT);
if (vf->port_vlan_info == vlanprio) {
/* duplicate request, so just return success */
dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
return 0;
}
vf->port_vlan_info = vlanprio;
if (vf->port_vlan_info)
dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n",
vlan_id, qos, vf_id);
else
dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
ice_vc_reset_vf(vf);
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
return 0;
}
/**
* ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
* @caps: VF driver negotiated capabilities
*
* Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false
*/
static bool ice_vf_vlan_offload_ena(u32 caps)
{
return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN);
}
/**
* ice_vc_process_vlan_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
* @add_v: Add VLAN if true, otherwise delete VLAN
*
* Process virtchnl op to add or remove programmed guest VLAN ID
*/
static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vlan_filter_list *vfl =
(struct virtchnl_vlan_filter_list *)msg;
struct ice_pf *pf = vf->pf;
bool vlan_promisc = false;
struct ice_vsi *vsi;
struct device *dev;
struct ice_hw *hw;
int status = 0;
u8 promisc_m;
int i;
dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
for (i = 0; i < vfl->num_elements; i++) {
if (vfl->vlan_id[i] >= VLAN_N_VID) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
dev_err(dev, "invalid VF VLAN id %d\n",
vfl->vlan_id[i]);
goto error_param;
}
}
hw = &pf->hw;
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
if (add_v && !ice_is_vf_trusted(vf) &&
vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
vf->vf_id);
/* There is no need to let VF know about being not trusted,
* so we can just return success message here
*/
goto error_param;
}
if (vsi->info.pvid) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags))
vlan_promisc = true;
if (add_v) {
for (i = 0; i < vfl->num_elements; i++) {
u16 vid = vfl->vlan_id[i];
if (!ice_is_vf_trusted(vf) &&
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being
* not trusted, so we can just return success
* message here as well.
*/
goto error_param;
}
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
/* we add VLAN 0 by default for each VF so we can enable
* Tx VLAN anti-spoof without triggering MDD events so
* we don't need to add it again here
*/
if (!vid)
continue;
status = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Enable VLAN pruning when non-zero VLAN is added */
if (!vlan_promisc && vid &&
!ice_vsi_is_vlan_pruning_ena(vsi)) {
status = ice_cfg_vlan_pruning(vsi, true, false);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
vid, status);
goto error_param;
}
} else if (vlan_promisc) {
/* Enable Ucast/Mcast VLAN promiscuous mode */
promisc_m = ICE_PROMISC_VLAN_TX |
ICE_PROMISC_VLAN_RX;
status = ice_set_vsi_promisc(hw, vsi->idx,
promisc_m, vid);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
vid, status);
}
}
}
} else {
/* In case of non_trusted VF, number of VLAN elements passed
* to PF for removal might be greater than number of VLANs
* filter programmed for that VF - So, use actual number of
* VLANS added earlier with add VLAN opcode. In order to avoid
* removing VLAN that doesn't exist, which result to sending
* erroneous failed message back to the VF
*/
int num_vf_vlan;
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
num_vf_vlan = vsi->num_vlan;
for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
u16 vid = vfl->vlan_id[i];
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
/* we add VLAN 0 by default for each VF so we can enable
* Tx VLAN anti-spoof without triggering MDD events so
* we don't want a VIRTCHNL request to remove it
*/
if (!vid)
continue;
/* Make sure ice_vsi_kill_vlan is successful before
* updating VLAN information
*/
ice: Fix VF spoofchk There are many things wrong with the function ice_set_vf_spoofchk(). 1. The VSI being modified is the PF VSI, not the VF VSI. 2. We are enabling Rx VLAN pruning instead of Tx VLAN anti-spoof. 3. The spoofchk setting for each VF is not initialized correctly or re-initialized correctly on reset. To fix [1] we need to make sure we are modifying the VF VSI. This is done by using the vf->lan_vsi_idx to index into the PF's VSI array. To fix [2] replace setting Rx VLAN pruning in ice_set_vf_spoofchk() with setting Tx VLAN anti-spoof. To Fix [3] we need to make sure the initial VSI settings match what is done in ice_set_vf_spoofchk() for spoofchk=on. Also make sure this also works for VF reset. This was done by modifying ice_vsi_init() to account for the current spoofchk state of the VF VSI. Because of these changes, Tx VLAN anti-spoof needs to be removed from ice_cfg_vlan_pruning(). This is okay for the VF because this is now controlled from the admin enabling/disabling spoofchk. For the PF, Tx VLAN anti-spoof should not be set. This change requires us to call ice_set_vf_spoofchk() when configuring promiscuous mode for the VF which requires ice_set_vf_spoofchk() to move in order to prevent a forward declaration prototype. Also, add VLAN 0 by default when allocating a VF since the PF is unaware if the guest OS is running the 8021q module. Without this, MDD events will trigger on untagged traffic because spoofcheck is enabled by default. Due to this change, ignore add/delete messages for VLAN 0 from VIRTCHNL since this is added/deleted during VF initialization/teardown respectively and should not be modified. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-12-12 11:12:54 +00:00
status = ice_vsi_kill_vlan(vsi, vid);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
/* Disable VLAN pruning when only VLAN 0 is left */
if (vsi->num_vlan == 1 &&
ice_vsi_is_vlan_pruning_ena(vsi))
ice_cfg_vlan_pruning(vsi, false, false);
/* Disable Unicast/Multicast VLAN promiscuous mode */
if (vlan_promisc) {
promisc_m = ICE_PROMISC_VLAN_TX |
ICE_PROMISC_VLAN_RX;
ice_clear_vsi_promisc(hw, vsi->idx,
promisc_m, vid);
}
}
}
error_param:
/* send the response to the VF */
if (add_v)
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, v_ret,
NULL, 0);
else
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, v_ret,
NULL, 0);
}
/**
* ice_vc_add_vlan_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* Add and program guest VLAN ID
*/
static int ice_vc_add_vlan_msg(struct ice_vf *vf, u8 *msg)
{
return ice_vc_process_vlan_msg(vf, msg, true);
}
/**
* ice_vc_remove_vlan_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
* remove programmed guest VLAN ID
*/
static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
{
return ice_vc_process_vlan_msg(vf, msg, false);
}
/**
* ice_vc_ena_vlan_stripping
* @vf: pointer to the VF info
*
* Enable VLAN header stripping for a given VF
*/
static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (ice_vsi_manage_vlan_stripping(vsi, true))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
v_ret, NULL, 0);
}
/**
* ice_vc_dis_vlan_stripping
* @vf: pointer to the VF info
*
* Disable VLAN header stripping for a given VF
*/
static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
if (ice_vsi_manage_vlan_stripping(vsi, false))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
v_ret, NULL, 0);
}
/**
* ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
* @vf: VF to enable/disable VLAN stripping for on initialization
*
* If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if
* the flag is cleared then we want to disable stripping. For example, the flag
* will be cleared when port VLANs are configured by the administrator before
* passing the VF to the guest or if the AVF driver doesn't support VLAN
* offloads.
*/
static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
{
struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
if (!vsi)
return -EINVAL;
/* don't modify stripping if port VLAN is configured */
if (vsi->info.pvid)
return 0;
if (ice_vf_vlan_offload_ena(vf->driver_caps))
return ice_vsi_manage_vlan_stripping(vsi, true);
else
return ice_vsi_manage_vlan_stripping(vsi, false);
}
/**
* ice_vc_process_vf_msg - Process request from VF
* @pf: pointer to the PF structure
* @event: pointer to the AQ event
*
* called from the common asq/arq handler to
* process request from VF
*/
void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
{
u32 v_opcode = le32_to_cpu(event->desc.cookie_high);
s16 vf_id = le16_to_cpu(event->desc.retval);
u16 msglen = event->msg_len;
u8 *msg = event->msg_buf;
struct ice_vf *vf = NULL;
struct device *dev;
int err = 0;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id)) {
err = -EINVAL;
goto error_handler;
}
vf = &pf->vf[vf_id];
/* Check if VF is disabled. */
if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
err = -EPERM;
goto error_handler;
}
/* Perform basic checks on the msg */
err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
if (err) {
if (err == VIRTCHNL_STATUS_ERR_PARAM)
err = -EPERM;
else
err = -EINVAL;
}
error_handler:
if (err) {
ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
NULL, 0);
dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
vf_id, v_opcode, msglen, err);
return;
}
switch (v_opcode) {
case VIRTCHNL_OP_VERSION:
err = ice_vc_get_ver_msg(vf, msg);
break;
case VIRTCHNL_OP_GET_VF_RESOURCES:
err = ice_vc_get_vf_res_msg(vf, msg);
if (ice_vf_init_vlan_stripping(vf))
dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n",
vf->vf_id);
ice_vc_notify_vf_link_state(vf);
break;
case VIRTCHNL_OP_RESET_VF:
ice_vc_reset_vf_msg(vf);
break;
case VIRTCHNL_OP_ADD_ETH_ADDR:
err = ice_vc_add_mac_addr_msg(vf, msg);
break;
case VIRTCHNL_OP_DEL_ETH_ADDR:
err = ice_vc_del_mac_addr_msg(vf, msg);
break;
case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
err = ice_vc_cfg_qs_msg(vf, msg);
break;
case VIRTCHNL_OP_ENABLE_QUEUES:
err = ice_vc_ena_qs_msg(vf, msg);
ice_vc_notify_vf_link_state(vf);
break;
case VIRTCHNL_OP_DISABLE_QUEUES:
err = ice_vc_dis_qs_msg(vf, msg);
break;
case VIRTCHNL_OP_REQUEST_QUEUES:
err = ice_vc_request_qs_msg(vf, msg);
break;
case VIRTCHNL_OP_CONFIG_IRQ_MAP:
err = ice_vc_cfg_irq_map_msg(vf, msg);
break;
case VIRTCHNL_OP_CONFIG_RSS_KEY:
err = ice_vc_config_rss_key(vf, msg);
break;
case VIRTCHNL_OP_CONFIG_RSS_LUT:
err = ice_vc_config_rss_lut(vf, msg);
break;
case VIRTCHNL_OP_GET_STATS:
err = ice_vc_get_stats_msg(vf, msg);
break;
case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
err = ice_vc_cfg_promiscuous_mode_msg(vf, msg);
break;
case VIRTCHNL_OP_ADD_VLAN:
err = ice_vc_add_vlan_msg(vf, msg);
break;
case VIRTCHNL_OP_DEL_VLAN:
err = ice_vc_remove_vlan_msg(vf, msg);
break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
err = ice_vc_ena_vlan_stripping(vf);
break;
case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
err = ice_vc_dis_vlan_stripping(vf);
break;
case VIRTCHNL_OP_UNKNOWN:
default:
dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
vf_id);
err = ice_vc_send_msg_to_vf(vf, v_opcode,
VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
NULL, 0);
break;
}
if (err) {
/* Helper function cares less about error return values here
* as it is busy with pending work.
*/
dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
vf_id, v_opcode, err);
}
}
/**
* ice_get_vf_cfg
* @netdev: network interface device structure
* @vf_id: VF identifier
* @ivi: VF configuration structure
*
* return VF configuration
*/
int
ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
if (ice_check_vf_init(pf, vf))
return -EBUSY;
ivi->vf = vf_id;
ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr);
/* VF configuration for VLAN and applicable QoS */
ivi->vlan = vf->port_vlan_info & VLAN_VID_MASK;
ivi->qos = (vf->port_vlan_info & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
ivi->trusted = vf->trusted;
ivi->spoofchk = vf->spoofchk;
if (!vf->link_forced)
ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
else if (vf->link_up)
ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
else
ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
ivi->max_tx_rate = vf->tx_rate;
ivi->min_tx_rate = 0;
return 0;
}
/**
* ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch
* @pf: PF used to reference the switch's rules
* @umac: unicast MAC to compare against existing switch rules
*
* Return true on the first/any match, else return false
*/
static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac)
{
struct ice_sw_recipe *mac_recipe_list =
&pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC];
struct ice_fltr_mgmt_list_entry *list_itr;
struct list_head *rule_head;
struct mutex *rule_lock; /* protect MAC filter list access */
rule_head = &mac_recipe_list->filt_rules;
rule_lock = &mac_recipe_list->filt_rule_lock;
mutex_lock(rule_lock);
list_for_each_entry(list_itr, rule_head, list_entry) {
u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0];
if (ether_addr_equal(existing_mac, umac)) {
mutex_unlock(rule_lock);
return true;
}
}
mutex_unlock(rule_lock);
return false;
}
/**
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
* @mac: MAC address
*
* program VF MAC address
*/
int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
if (is_multicast_ether_addr(mac)) {
netdev_err(netdev, "%pM not a valid unicast address\n", mac);
return -EINVAL;
}
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
vf = &pf->vf[vf_id];
/* nothing left to do, unicast MAC already set */
if (ether_addr_equal(vf->dflt_lan_addr.addr, mac))
return 0;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
if (ice_unicast_mac_exists(pf, mac)) {
netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n",
mac, vf_id, mac);
return -EINVAL;
}
/* VF is notified of its new MAC via the PF's response to the
* VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
*/
ether_addr_copy(vf->dflt_lan_addr.addr, mac);
if (is_zero_ether_addr(mac)) {
/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
vf->pf_set_mac = false;
netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
vf->vf_id);
} else {
/* PF will add MAC rule for the VF */
vf->pf_set_mac = true;
netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
mac, vf_id);
}
ice_vc_reset_vf(vf);
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
return 0;
}
/**
* ice_set_vf_trust
* @netdev: network interface device structure
* @vf_id: VF identifier
* @trusted: Boolean value to enable/disable trusted VF
*
* Enable or disable a given VF as trusted
*/
int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
/* Check if already trusted */
if (trusted == vf->trusted)
return 0;
vf->trusted = trusted;
ice_vc_reset_vf(vf);
dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
vf_id, trusted ? "" : "un");
return 0;
}
/**
* ice_set_vf_link_state
* @netdev: network interface device structure
* @vf_id: VF identifier
* @link_state: required link state
*
* Set VF's link state, irrespective of physical link state status
*/
int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
switch (link_state) {
case IFLA_VF_LINK_STATE_AUTO:
vf->link_forced = false;
break;
case IFLA_VF_LINK_STATE_ENABLE:
vf->link_forced = true;
vf->link_up = true;
break;
case IFLA_VF_LINK_STATE_DISABLE:
vf->link_forced = true;
vf->link_up = false;
break;
default:
return -EINVAL;
}
ice_vc_notify_vf_link_state(vf);
return 0;
}
/**
* ice_get_vf_stats - populate some stats for the VF
* @netdev: the netdev of the PF
* @vf_id: the host OS identifier (0-255)
* @vf_stats: pointer to the OS memory to be initialized
*/
int ice_get_vf_stats(struct net_device *netdev, int vf_id,
struct ifla_vf_stats *vf_stats)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_eth_stats *stats;
struct ice_vsi *vsi;
struct ice_vf *vf;
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
int ret;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml: <interface type='hostdev' managed='yes'> <source> <address type='pci' domain='0x0000' bus='0xaf' slot='0x0' function='0x0'/> </source> <mac address='00:de:ad:00:11:01'/> </interface> > virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2020-02-18 21:22:06 +00:00
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi)
return -EINVAL;
ice_update_eth_stats(vsi);
stats = &vsi->eth_stats;
memset(vf_stats, 0, sizeof(*vf_stats));
vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
stats->rx_multicast;
vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
stats->tx_multicast;
vf_stats->rx_bytes = stats->rx_bytes;
vf_stats->tx_bytes = stats->tx_bytes;
vf_stats->broadcast = stats->rx_broadcast;
vf_stats->multicast = stats->rx_multicast;
vf_stats->rx_dropped = stats->rx_discards;
vf_stats->tx_dropped = stats->tx_discards;
return 0;
}
/**
* ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event
* @vf: pointer to the VF structure
*/
void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
struct device *dev;
dev = ice_pf_to_dev(pf);
dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
vf->dflt_lan_addr.addr,
test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
? "on" : "off");
}
/**
* ice_print_vfs_mdd_event - print VFs malicious driver detect event
* @pf: pointer to the PF structure
*
* Called from ice_handle_mdd_event to rate limit and print VFs MDD events.
*/
void ice_print_vfs_mdd_events(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int i;
/* check that there are pending MDD events to print */
if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state))
return;
/* VF MDD event logs are rate limited to one second intervals */
if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1))
return;
pf->last_printed_mdd_jiffies = jiffies;
ice_for_each_vf(pf, i) {
struct ice_vf *vf = &pf->vf[i];
/* only print Rx MDD event message if there are new events */
if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
vf->mdd_rx_events.last_printed =
vf->mdd_rx_events.count;
ice_print_vf_rx_mdd_event(vf);
}
/* only print Tx MDD event message if there are new events */
if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
vf->mdd_tx_events.last_printed =
vf->mdd_tx_events.count;
dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n",
vf->mdd_tx_events.count, hw->pf_id, i,
vf->dflt_lan_addr.addr);
}
}
}