sfc: de-indirect TSO handling
Remove the tx_queue->handle_tso function pointer, and just use tx_queue->tso_version to decide which function to call, thus removing an indirect call from the fast path. Instead of passing a tso_v2 flag to efx_mcdi_tx_init(), set the desired tx_queue->tso_version before calling it. In efx_mcdi_tx_init(), report back failure to obtain a TSOv2 context by setting tx_queue->tso_version to 0, which will cause the TX path to use the GSO-based fallback. Signed-off-by: Edward Cree <ecree@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
fe0c4060c1
commit
1679c72cf4
@@ -2175,8 +2175,7 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
|
|||||||
|
|
||||||
/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
|
/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
|
||||||
*/
|
*/
|
||||||
static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
|
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
|
||||||
struct sk_buff *skb,
|
|
||||||
bool *data_mapped)
|
bool *data_mapped)
|
||||||
{
|
{
|
||||||
struct efx_tx_buffer *buffer;
|
struct efx_tx_buffer *buffer;
|
||||||
@@ -2266,7 +2265,6 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
|
|||||||
struct efx_channel *channel = tx_queue->channel;
|
struct efx_channel *channel = tx_queue->channel;
|
||||||
struct efx_nic *efx = tx_queue->efx;
|
struct efx_nic *efx = tx_queue->efx;
|
||||||
struct efx_ef10_nic_data *nic_data;
|
struct efx_ef10_nic_data *nic_data;
|
||||||
bool tso_v2 = false;
|
|
||||||
efx_qword_t *txd;
|
efx_qword_t *txd;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
@@ -2289,15 +2287,18 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
|
|||||||
* TSOv2 cannot be used with Hardware timestamping, and is never needed
|
* TSOv2 cannot be used with Hardware timestamping, and is never needed
|
||||||
* for XDP tx.
|
* for XDP tx.
|
||||||
*/
|
*/
|
||||||
if ((csum_offload || inner_csum) && (nic_data->datapath_caps2 &
|
if (efx_has_cap(efx, TX_TSO_V2)) {
|
||||||
(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
|
if ((csum_offload || inner_csum) &&
|
||||||
!tx_queue->timestamping && !tx_queue->xdp_tx) {
|
!tx_queue->timestamping && !tx_queue->xdp_tx) {
|
||||||
tso_v2 = true;
|
tx_queue->tso_version = 2;
|
||||||
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
|
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
|
||||||
channel->channel);
|
channel->channel);
|
||||||
}
|
}
|
||||||
|
} else if (efx_has_cap(efx, TX_TSO)) {
|
||||||
|
tx_queue->tso_version = 1;
|
||||||
|
}
|
||||||
|
|
||||||
rc = efx_mcdi_tx_init(tx_queue, tso_v2);
|
rc = efx_mcdi_tx_init(tx_queue);
|
||||||
if (rc)
|
if (rc)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@@ -2315,20 +2316,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
|
|||||||
ESF_DZ_TX_OPTION_TYPE,
|
ESF_DZ_TX_OPTION_TYPE,
|
||||||
ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
|
ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
|
||||||
ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
|
ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
|
||||||
ESF_DZ_TX_OPTION_IP_CSUM, csum_offload && !tso_v2,
|
ESF_DZ_TX_OPTION_IP_CSUM, csum_offload && tx_queue->tso_version != 2,
|
||||||
ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM, inner_csum,
|
ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM, inner_csum,
|
||||||
ESF_DZ_TX_OPTION_INNER_IP_CSUM, inner_csum && !tso_v2,
|
ESF_DZ_TX_OPTION_INNER_IP_CSUM, inner_csum && tx_queue->tso_version != 2,
|
||||||
ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
|
ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
|
||||||
tx_queue->write_count = 1;
|
tx_queue->write_count = 1;
|
||||||
|
|
||||||
if (tso_v2) {
|
|
||||||
tx_queue->handle_tso = efx_ef10_tx_tso_desc;
|
|
||||||
tx_queue->tso_version = 2;
|
|
||||||
} else if (nic_data->datapath_caps &
|
|
||||||
(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
|
|
||||||
tx_queue->tso_version = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
wmb();
|
wmb();
|
||||||
efx_ef10_push_tx_desc(tx_queue, txd);
|
efx_ef10_push_tx_desc(tx_queue, txd);
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,14 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue)
|
|||||||
tx_queue->channel->channel -
|
tx_queue->channel->channel -
|
||||||
tx_queue->efx->tx_channel_offset);
|
tx_queue->efx->tx_channel_offset);
|
||||||
|
|
||||||
if (efx_mcdi_tx_init(tx_queue, false))
|
/* This value is purely documentational; as EF100 never passes through
|
||||||
|
* the switch statement in tx.c:__efx_enqueue_skb(), that switch does
|
||||||
|
* not handle case 3. EF100's TSOv3 descriptors are generated by
|
||||||
|
* ef100_make_tso_desc().
|
||||||
|
* Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
|
||||||
|
*/
|
||||||
|
tx_queue->tso_version = 3;
|
||||||
|
if (efx_mcdi_tx_init(tx_queue))
|
||||||
netdev_WARN(tx_queue->efx->net_dev,
|
netdev_WARN(tx_queue->efx->net_dev,
|
||||||
"failed to initialise TXQ %d\n", tx_queue->queue);
|
"failed to initialise TXQ %d\n", tx_queue->queue);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -415,6 +415,8 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
|
|||||||
FFE_BZ_TX_PACE_OFF :
|
FFE_BZ_TX_PACE_OFF :
|
||||||
FFE_BZ_TX_PACE_RESERVED);
|
FFE_BZ_TX_PACE_RESERVED);
|
||||||
efx_writeo_table(efx, ®, FR_BZ_TX_PACE_TBL, tx_queue->queue);
|
efx_writeo_table(efx, ®, FR_BZ_TX_PACE_TBL, tx_queue->queue);
|
||||||
|
|
||||||
|
tx_queue->tso_version = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
|
static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ fail:
|
|||||||
outbuf, outlen, rc);
|
outbuf, outlen, rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
|
int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue)
|
||||||
{
|
{
|
||||||
MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
|
MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
|
||||||
EFX_BUF_SIZE));
|
EFX_BUF_SIZE));
|
||||||
@@ -195,6 +195,8 @@ int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
|
|||||||
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
|
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
bool tso_v2 = tx_queue->tso_version == 2;
|
||||||
|
|
||||||
/* TSOv2 implies IP header checksum offload for TSO frames,
|
/* TSOv2 implies IP header checksum offload for TSO frames,
|
||||||
* so we can safely disable IP header checksum offload for
|
* so we can safely disable IP header checksum offload for
|
||||||
* everything else. If we don't have TSOv2, then we have to
|
* everything else. If we don't have TSOv2, then we have to
|
||||||
@@ -217,7 +219,7 @@ int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
|
|||||||
NULL, 0, NULL);
|
NULL, 0, NULL);
|
||||||
if (rc == -ENOSPC && tso_v2) {
|
if (rc == -ENOSPC && tso_v2) {
|
||||||
/* Retry without TSOv2 if we're short on contexts. */
|
/* Retry without TSOv2 if we're short on contexts. */
|
||||||
tso_v2 = false;
|
tx_queue->tso_version = 0;
|
||||||
netif_warn(efx, probe, efx->net_dev,
|
netif_warn(efx, probe, efx->net_dev,
|
||||||
"TSOv2 context not available to segment in "
|
"TSOv2 context not available to segment in "
|
||||||
"hardware. TCP performance may be reduced.\n"
|
"hardware. TCP performance may be reduced.\n"
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ int efx_mcdi_ev_probe(struct efx_channel *channel);
|
|||||||
int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
|
int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
|
||||||
void efx_mcdi_ev_remove(struct efx_channel *channel);
|
void efx_mcdi_ev_remove(struct efx_channel *channel);
|
||||||
void efx_mcdi_ev_fini(struct efx_channel *channel);
|
void efx_mcdi_ev_fini(struct efx_channel *channel);
|
||||||
int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2);
|
int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue);
|
||||||
void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
|
void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
|
||||||
void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
|
void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
|
||||||
int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
|
int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
|
||||||
|
|||||||
@@ -208,8 +208,6 @@ struct efx_tx_buffer {
|
|||||||
* @initialised: Has hardware queue been initialised?
|
* @initialised: Has hardware queue been initialised?
|
||||||
* @timestamping: Is timestamping enabled for this channel?
|
* @timestamping: Is timestamping enabled for this channel?
|
||||||
* @xdp_tx: Is this an XDP tx queue?
|
* @xdp_tx: Is this an XDP tx queue?
|
||||||
* @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and
|
|
||||||
* may also map tx data, depending on the nature of the TSO implementation.
|
|
||||||
* @read_count: Current read pointer.
|
* @read_count: Current read pointer.
|
||||||
* This is the number of buffers that have been removed from both rings.
|
* This is the number of buffers that have been removed from both rings.
|
||||||
* @old_write_count: The value of @write_count when last checked.
|
* @old_write_count: The value of @write_count when last checked.
|
||||||
@@ -272,9 +270,6 @@ struct efx_tx_queue {
|
|||||||
bool timestamping;
|
bool timestamping;
|
||||||
bool xdp_tx;
|
bool xdp_tx;
|
||||||
|
|
||||||
/* Function pointers used in the fast path. */
|
|
||||||
int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
|
|
||||||
|
|
||||||
/* Members used mainly on the completion path */
|
/* Members used mainly on the completion path */
|
||||||
unsigned int read_count ____cacheline_aligned_in_smp;
|
unsigned int read_count ____cacheline_aligned_in_smp;
|
||||||
unsigned int old_write_count;
|
unsigned int old_write_count;
|
||||||
|
|||||||
@@ -297,6 +297,10 @@ struct efx_ef10_nic_data {
|
|||||||
u64 licensed_features;
|
u64 licensed_features;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* TSOv2 */
|
||||||
|
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
|
||||||
|
bool *data_mapped);
|
||||||
|
|
||||||
int efx_init_sriov(void);
|
int efx_init_sriov(void);
|
||||||
void efx_fini_sriov(void);
|
void efx_fini_sriov(void);
|
||||||
|
|
||||||
|
|||||||
@@ -338,8 +338,18 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
|
|||||||
* size limit.
|
* size limit.
|
||||||
*/
|
*/
|
||||||
if (segments) {
|
if (segments) {
|
||||||
EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso);
|
switch (tx_queue->tso_version) {
|
||||||
rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped);
|
case 1:
|
||||||
|
rc = efx_enqueue_skb_tso(tx_queue, skb, &data_mapped);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
rc = efx_ef10_tx_tso_desc(tx_queue, skb, &data_mapped);
|
||||||
|
break;
|
||||||
|
case 0: /* No TSO on this queue, SW fallback needed */
|
||||||
|
default:
|
||||||
|
rc = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (rc == -EINVAL) {
|
if (rc == -EINVAL) {
|
||||||
rc = efx_tx_tso_fallback(tx_queue, skb);
|
rc = efx_tx_tso_fallback(tx_queue, skb);
|
||||||
tx_queue->tso_fallbacks++;
|
tx_queue->tso_fallbacks++;
|
||||||
|
|||||||
@@ -86,11 +86,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
|
|||||||
tx_queue->completed_timestamp_minor = 0;
|
tx_queue->completed_timestamp_minor = 0;
|
||||||
|
|
||||||
tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
|
tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
|
||||||
|
tx_queue->tso_version = 0;
|
||||||
/* Set up default function pointers. These may get replaced by
|
|
||||||
* efx_nic_init_tx() based off NIC/queue capabilities.
|
|
||||||
*/
|
|
||||||
tx_queue->handle_tso = efx_enqueue_skb_tso;
|
|
||||||
|
|
||||||
/* Set up TX descriptor ring */
|
/* Set up TX descriptor ring */
|
||||||
efx_nic_init_tx(tx_queue);
|
efx_nic_init_tx(tx_queue);
|
||||||
|
|||||||
Reference in New Issue
Block a user