From 61a4427b955f79dfaa735788511ce558962c9d70 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:18 +0800 Subject: [PATCH 1/6] net: fec: Factorize the .xmit transmit function Make the code more readable and easy to support other features like SG, TSO, moving the common transmit function to one api. And the patch also factorize the getting BD index to it own function. CC: David Laight Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 1 + drivers/net/ethernet/freescale/fec_main.c | 81 +++++++++++++---------- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 3b8d6d19ff05..db967a08637d 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -296,6 +296,7 @@ struct fec_enet_private { /* The ring entries to be free()ed */ struct bufdesc *dirty_tx; + unsigned short bufdesc_size; unsigned short tx_ring_size; unsigned short rx_ring_size; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 802be17285b6..ee8e5477c819 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -287,6 +287,12 @@ struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, struct fec_enet_priva return (new_bd < base) ? (new_bd + ring_size) : new_bd; } +static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp, + struct fec_enet_private *fep) +{ + return ((const char *)bdp - (const char *)base) / fep->bufdesc_size; +} + static void *swap_buffer(void *bufaddr, int len) { int i; @@ -313,8 +319,7 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev) return 0; } -static netdev_tx_t -fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); const struct platform_device_id *id_entry = @@ -329,14 +334,6 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) status = bdp->cbd_sc; - if (status & BD_ENET_TX_READY) { - /* Ooops. All transmit buffers are full. Bail out. - * This should not happen, since ndev->tbusy should be set. - */ - netdev_err(ndev, "tx queue full!\n"); - return NETDEV_TX_BUSY; - } - /* Protocol checksum off-load for TCP and UDP. */ if (fec_enet_clear_csum(skb, ndev)) { dev_kfree_skb_any(skb); @@ -350,16 +347,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) bufaddr = skb->data; bdp->cbd_datlen = skb->len; - /* - * On some FEC implementations data must be aligned on - * 4-byte boundaries. Use bounce buffers to copy data - * and get it aligned. Ugh. - */ - if (fep->bufdesc_ex) - index = (struct bufdesc_ex *)bdp - - (struct bufdesc_ex *)fep->tx_bd_base; - else - index = bdp - fep->tx_bd_base; + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { memcpy(fep->tx_bounce[index], skb->data, skb->len); @@ -433,15 +421,43 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) fep->cur_tx = bdp; - if (fep->cur_tx == fep->dirty_tx) - netif_stop_queue(ndev); - /* Trigger transmission start */ writel(0, fep->hwp + FEC_X_DES_ACTIVE); return NETDEV_TX_OK; } +static netdev_tx_t +fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct bufdesc *bdp; + unsigned short status; + int ret; + + /* Fill in a Tx ring entry */ + bdp = fep->cur_tx; + + status = bdp->cbd_sc; + + if (status & BD_ENET_TX_READY) { + /* Ooops. All transmit buffers are full. Bail out. + * This should not happen, since ndev->tbusy should be set. + */ + netdev_err(ndev, "tx queue full!\n"); + return NETDEV_TX_BUSY; + } + + ret = txq_submit_skb(skb, ndev); + if (ret == -EBUSY) + return NETDEV_TX_BUSY; + + if (fep->cur_tx == fep->dirty_tx) + netif_stop_queue(ndev); + + return NETDEV_TX_OK; +} + /* Init RX & TX buffer descriptors */ static void fec_enet_bd_init(struct net_device *dev) @@ -770,11 +786,7 @@ fec_enet_tx(struct net_device *ndev) if (bdp == fep->cur_tx) break; - if (fep->bufdesc_ex) - index = (struct bufdesc_ex *)bdp - - (struct bufdesc_ex *)fep->tx_bd_base; - else - index = bdp - fep->tx_bd_base; + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); skb = fep->tx_skbuff[index]; dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len, @@ -921,11 +933,7 @@ fec_enet_rx(struct net_device *ndev, int budget) pkt_len = bdp->cbd_datlen; ndev->stats.rx_bytes += pkt_len; - if (fep->bufdesc_ex) - index = (struct bufdesc_ex *)bdp - - (struct bufdesc_ex *)fep->rx_bd_base; - else - index = bdp - fep->rx_bd_base; + index = fec_enet_get_bd_index(fep->rx_bd_base, bdp, fep); data = fep->rx_skbuff[index]->data; dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr, FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); @@ -2061,11 +2069,14 @@ static int fec_enet_init(struct net_device *ndev) /* Set receive and transmit descriptor base. */ fep->rx_bd_base = cbd_base; - if (fep->bufdesc_ex) + if (fep->bufdesc_ex) { fep->tx_bd_base = (struct bufdesc *) (((struct bufdesc_ex *)cbd_base) + fep->rx_ring_size); - else + fep->bufdesc_size = sizeof(struct bufdesc_ex); + } else { fep->tx_bd_base = cbd_base + fep->rx_ring_size; + fep->bufdesc_size = sizeof(struct bufdesc); + } /* The FEC Ethernet specific entries in the device structure */ ndev->watchdog_timeo = TX_TIMEOUT; From 96c50caa5148e0e0a077672574785700885c6764 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:19 +0800 Subject: [PATCH 2/6] net: fec: Enable IP header hardware checksum IP header checksum is calcalated by network layer in default. To support software TSO, it is better to use HW calculate the IP header checksum. FEC hw checksum feature request the checksum field in frame is zero, otherwise the calculative CRC is not correct. For segmentated TCP packet, HW calculate the IP header checksum again, it doesn't bring any impact. For SW TSO, HW calculated checksum bring better performance. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index ee8e5477c819..55b55d2f73bc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -314,6 +314,7 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev) if (unlikely(skb_cow_head(skb, 0))) return -1; + ip_hdr(skb)->check = 0; *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 0; return 0; @@ -395,7 +396,7 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) * are done by the kernel */ if (skb->ip_summed == CHECKSUM_PARTIAL) - ebdp->cbd_esc |= BD_ENET_TX_PINS; + ebdp->cbd_esc |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; } } From 09d1e541fd906861805446af4bb6236a0d2cf09b Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:20 +0800 Subject: [PATCH 3/6] net: fec: Factorize feature setting In order to enhance the code readable, let's factorize the feature list. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 55b55d2f73bc..f943cd142ef7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2087,21 +2087,19 @@ static int fec_enet_init(struct net_device *ndev) writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK); netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT); - if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) { + if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) /* enable hw VLAN support */ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; - ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - } if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { /* enable hw accelerator */ ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM); - ndev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM - | NETIF_F_RXCSUM); fep->csum_flags |= FLAG_RX_CSUM_ENABLED; } + ndev->hw_features = ndev->features; + fec_restart(ndev, 0); return 0; From 55d0218ae2e23eb8a4da1f277eba53ba4edb9a26 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:21 +0800 Subject: [PATCH 4/6] net: fec: Increase buffer descriptor entry number In order to support SG, software TSO, let's increase BD entry number. CC: Ezequiel Garcia CC: Eric Dumazet CC: David Laight Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 4 ++-- drivers/net/ethernet/freescale/fec_main.c | 29 ++++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index db967a08637d..5ffd32308b9c 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -246,8 +246,8 @@ struct bufdesc_ex { #define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) #define FEC_ENET_TX_FRSIZE 2048 #define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) -#define TX_RING_SIZE 16 /* Must be power of two */ -#define TX_RING_MOD_MASK 15 /* for this to work */ +#define TX_RING_SIZE 512 /* Must be power of two */ +#define TX_RING_MOD_MASK 511 /* for this to work */ #define BD_ENET_RX_INT 0x00800000 #define BD_ENET_RX_PTP ((ushort)0x0400) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f943cd142ef7..b27a729222b8 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -173,10 +173,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #endif #endif /* CONFIG_M5272 */ -#if (((RX_RING_SIZE + TX_RING_SIZE) * 32) > PAGE_SIZE) -#error "FEC: descriptor ring size constants too large" -#endif - /* Interrupt events/masks. */ #define FEC_ENET_HBERR ((uint)0x80000000) /* Heartbeat error */ #define FEC_ENET_BABR ((uint)0x40000000) /* Babbling receiver */ @@ -2048,9 +2044,21 @@ static int fec_enet_init(struct net_device *ndev) const struct platform_device_id *id_entry = platform_get_device_id(fep->pdev); struct bufdesc *cbd_base; + int bd_size; + + /* init the tx & rx ring size */ + fep->tx_ring_size = TX_RING_SIZE; + fep->rx_ring_size = RX_RING_SIZE; + + if (fep->bufdesc_ex) + fep->bufdesc_size = sizeof(struct bufdesc_ex); + else + fep->bufdesc_size = sizeof(struct bufdesc); + bd_size = (fep->tx_ring_size + fep->rx_ring_size) * + fep->bufdesc_size; /* Allocate memory for buffer descriptors. */ - cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma, + cbd_base = dma_alloc_coherent(NULL, bd_size, &fep->bd_dma, GFP_KERNEL); if (!cbd_base) return -ENOMEM; @@ -2064,20 +2072,13 @@ static int fec_enet_init(struct net_device *ndev) /* make sure MAC we just acquired is programmed into the hw */ fec_set_mac_address(ndev, NULL); - /* init the tx & rx ring size */ - fep->tx_ring_size = TX_RING_SIZE; - fep->rx_ring_size = RX_RING_SIZE; - /* Set receive and transmit descriptor base. */ fep->rx_bd_base = cbd_base; - if (fep->bufdesc_ex) { + if (fep->bufdesc_ex) fep->tx_bd_base = (struct bufdesc *) (((struct bufdesc_ex *)cbd_base) + fep->rx_ring_size); - fep->bufdesc_size = sizeof(struct bufdesc_ex); - } else { + else fep->tx_bd_base = cbd_base + fep->rx_ring_size; - fep->bufdesc_size = sizeof(struct bufdesc); - } /* The FEC Ethernet specific entries in the device structure */ ndev->watchdog_timeo = TX_TIMEOUT; From 6e909283cb344e32aa8adb4a4c169512d8e5fd27 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:22 +0800 Subject: [PATCH 5/6] net: fec: Add Scatter/gather support Add Scatter/gather support for FEC. This feature allows to improve outbound throughput performance. Tested on imx6dl sabresd board: Running iperf tests shows a 55.4% improvement. $ ethtool -K eth0 sg off $ iperf -c 10.192.242.167 -t 3 & [ 3] local 10.192.242.108 port 52618 connected with 10.192.242.167 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0- 3.0 sec 99.5 MBytes 278 Mbits/sec $ ethtool -K eth0 sg on $ iperf -c 10.192.242.167 -t 3 & [ 3] local 10.192.242.108 port 52617 connected with 10.192.242.167 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0- 3.0 sec 154 MBytes 432 Mbits/sec CC: Li Frank Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 2 +- drivers/net/ethernet/freescale/fec_main.c | 312 +++++++++++++++------- 2 files changed, 215 insertions(+), 99 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 5ffd32308b9c..e7ce14d8d3c3 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -221,7 +221,7 @@ struct bufdesc_ex { #define BD_ENET_TX_RCMASK ((ushort)0x003c) #define BD_ENET_TX_UN ((ushort)0x0002) #define BD_ENET_TX_CSL ((ushort)0x0001) -#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */ +#define BD_ENET_TX_STATS ((ushort)0x0fff) /* All status bits */ /*enhanced buffer descriptor control/status used by Ethernet transmit*/ #define BD_ENET_TX_INT 0x40000000 diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b27a729222b8..bea00a8d6c99 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -289,6 +289,16 @@ static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp, return ((const char *)bdp - (const char *)base) / fep->bufdesc_size; } +static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep) +{ + int entries; + + entries = ((const char *)fep->dirty_tx - + (const char *)fep->cur_tx) / fep->bufdesc_size - 1; + + return entries > 0 ? entries : entries + fep->tx_ring_size; +} + static void *swap_buffer(void *bufaddr, int len) { int i; @@ -316,92 +326,12 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev) return 0; } -static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) +static void +fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep) { - struct fec_enet_private *fep = netdev_priv(ndev); const struct platform_device_id *id_entry = platform_get_device_id(fep->pdev); - struct bufdesc *bdp, *bdp_pre; - void *bufaddr; - unsigned short status; - unsigned int index; - - /* Fill in a Tx ring entry */ - bdp = fep->cur_tx; - - status = bdp->cbd_sc; - - /* Protocol checksum off-load for TCP and UDP. */ - if (fec_enet_clear_csum(skb, ndev)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - /* Clear all of the status flags */ - status &= ~BD_ENET_TX_STATS; - - /* Set buffer length and buffer pointer */ - bufaddr = skb->data; - bdp->cbd_datlen = skb->len; - - index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); - - if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { - memcpy(fep->tx_bounce[index], skb->data, skb->len); - bufaddr = fep->tx_bounce[index]; - } - - /* - * Some design made an incorrect assumption on endian mode of - * the system that it's running on. As the result, driver has to - * swap every frame going to and coming from the controller. - */ - if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) - swap_buffer(bufaddr, skb->len); - - /* Save skb pointer */ - fep->tx_skbuff[index] = skb; - - /* Push the data cache so the CPM does not get stale memory - * data. - */ - bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, - skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { - bdp->cbd_bufaddr = 0; - fep->tx_skbuff[index] = NULL; - dev_kfree_skb_any(skb); - if (net_ratelimit()) - netdev_err(ndev, "Tx DMA memory map failed\n"); - return NETDEV_TX_OK; - } - - if (fep->bufdesc_ex) { - - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - ebdp->cbd_bdu = 0; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - fep->hwts_tx_en)) { - ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT); - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - } else { - ebdp->cbd_esc = BD_ENET_TX_INT; - - /* Enable protocol checksum flags - * We do not bother with the IP Checksum bits as they - * are done by the kernel - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) - ebdp->cbd_esc |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; - } - } - - /* Send it on its way. Tell FEC it's ready, interrupt when done, - * it's the last BD of the frame, and to put the CRC on the end. - */ - status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR - | BD_ENET_TX_LAST | BD_ENET_TX_TC); - bdp->cbd_sc = status; + struct bufdesc *bdp_pre; bdp_pre = fec_enet_get_prevdesc(bdp, fep); if ((id_entry->driver_data & FEC_QUIRK_ERR006358) && @@ -410,9 +340,189 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) schedule_delayed_work(&(fep->delay_work.delay_work), msecs_to_jiffies(1)); } +} + +static int +fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + struct bufdesc *bdp = fep->cur_tx; + struct bufdesc_ex *ebdp; + int nr_frags = skb_shinfo(skb)->nr_frags; + int frag, frag_len; + unsigned short status; + unsigned int estatus = 0; + skb_frag_t *this_frag; + unsigned int index; + void *bufaddr; + int i; + + for (frag = 0; frag < nr_frags; frag++) { + this_frag = &skb_shinfo(skb)->frags[frag]; + bdp = fec_enet_get_nextdesc(bdp, fep); + ebdp = (struct bufdesc_ex *)bdp; + + status = bdp->cbd_sc; + status &= ~BD_ENET_TX_STATS; + status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); + frag_len = skb_shinfo(skb)->frags[frag].size; + + /* Handle the last BD specially */ + if (frag == nr_frags - 1) { + status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); + if (fep->bufdesc_ex) { + estatus |= BD_ENET_TX_INT; + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP && fep->hwts_tx_en)) + estatus |= BD_ENET_TX_TS; + } + } + + if (fep->bufdesc_ex) { + if (skb->ip_summed == CHECKSUM_PARTIAL) + estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = estatus; + } + + bufaddr = page_address(this_frag->page.p) + this_frag->page_offset; + + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); + if (((unsigned long) bufaddr) & FEC_ALIGNMENT || + id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { + memcpy(fep->tx_bounce[index], bufaddr, frag_len); + bufaddr = fep->tx_bounce[index]; + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(bufaddr, frag_len); + } + + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, + frag_len, DMA_TO_DEVICE); + if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); + goto dma_mapping_error; + } + + bdp->cbd_datlen = frag_len; + bdp->cbd_sc = status; + } + + fep->cur_tx = bdp; + + return 0; + +dma_mapping_error: + bdp = fep->cur_tx; + for (i = 0; i < frag; i++) { + bdp = fec_enet_get_nextdesc(bdp, fep); + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + bdp->cbd_datlen, DMA_TO_DEVICE); + } + return NETDEV_TX_OK; +} + +static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + int nr_frags = skb_shinfo(skb)->nr_frags; + struct bufdesc *bdp, *last_bdp; + void *bufaddr; + unsigned short status; + unsigned short buflen; + unsigned int estatus = 0; + unsigned int index; + int ret; + + /* Protocol checksum off-load for TCP and UDP. */ + if (fec_enet_clear_csum(skb, ndev)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* Fill in a Tx ring entry */ + bdp = fep->cur_tx; + status = bdp->cbd_sc; + status &= ~BD_ENET_TX_STATS; + + /* Set buffer length and buffer pointer */ + bufaddr = skb->data; + buflen = skb_headlen(skb); + + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); + if (((unsigned long) bufaddr) & FEC_ALIGNMENT || + id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { + memcpy(fep->tx_bounce[index], skb->data, buflen); + bufaddr = fep->tx_bounce[index]; + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(bufaddr, buflen); + } + + /* Push the data cache so the CPM does not get stale memory + * data. + */ + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, + buflen, DMA_TO_DEVICE); + if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); + return NETDEV_TX_OK; + } + + if (nr_frags) { + ret = fec_enet_txq_submit_frag_skb(skb, ndev); + if (ret) + return ret; + } else { + status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); + if (fep->bufdesc_ex) { + estatus = BD_ENET_TX_INT; + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP && fep->hwts_tx_en)) + estatus |= BD_ENET_TX_TS; + } + } + + if (fep->bufdesc_ex) { + + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + fep->hwts_tx_en)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; + + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = estatus; + } + + last_bdp = fep->cur_tx; + index = fec_enet_get_bd_index(fep->tx_bd_base, last_bdp, fep); + /* Save skb pointer */ + fep->tx_skbuff[index] = skb; + + bdp->cbd_datlen = buflen; + + /* Send it on its way. Tell FEC it's ready, interrupt when done, + * it's the last BD of the frame, and to put the CRC on the end. + */ + status |= (BD_ENET_TX_READY | BD_ENET_TX_TC); + bdp->cbd_sc = status; + + fec_enet_submit_work(bdp, fep); /* If this was the last BD in the ring, start at the beginning again. */ - bdp = fec_enet_get_nextdesc(bdp, fep); + bdp = fec_enet_get_nextdesc(last_bdp, fep); skb_tx_timestamp(skb); @@ -421,7 +531,7 @@ static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) /* Trigger transmission start */ writel(0, fep->hwp + FEC_X_DES_ACTIVE); - return NETDEV_TX_OK; + return 0; } static netdev_tx_t @@ -430,6 +540,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); struct bufdesc *bdp; unsigned short status; + int entries_free; int ret; /* Fill in a Tx ring entry */ @@ -441,15 +552,17 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* Ooops. All transmit buffers are full. Bail out. * This should not happen, since ndev->tbusy should be set. */ - netdev_err(ndev, "tx queue full!\n"); + if (net_ratelimit()) + netdev_err(ndev, "tx queue full!\n"); return NETDEV_TX_BUSY; } - ret = txq_submit_skb(skb, ndev); - if (ret == -EBUSY) - return NETDEV_TX_BUSY; + ret = fec_enet_txq_submit_skb(skb, ndev); + if (ret) + return ret; - if (fep->cur_tx == fep->dirty_tx) + entries_free = fec_enet_get_free_txdesc_num(fep); + if (entries_free < MAX_SKB_FRAGS + 1) netif_stop_queue(ndev); return NETDEV_TX_OK; @@ -770,6 +883,7 @@ fec_enet_tx(struct net_device *ndev) unsigned short status; struct sk_buff *skb; int index = 0; + int entries; fep = netdev_priv(ndev); bdp = fep->dirty_tx; @@ -786,9 +900,13 @@ fec_enet_tx(struct net_device *ndev) index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); skb = fep->tx_skbuff[index]; - dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len, + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen, DMA_TO_DEVICE); bdp->cbd_bufaddr = 0; + if (!skb) { + bdp = fec_enet_get_nextdesc(bdp, fep); + continue; + } /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | @@ -807,7 +925,7 @@ fec_enet_tx(struct net_device *ndev) ndev->stats.tx_carrier_errors++; } else { ndev->stats.tx_packets++; - ndev->stats.tx_bytes += bdp->cbd_datlen; + ndev->stats.tx_bytes += skb->len; } if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) && @@ -844,15 +962,13 @@ fec_enet_tx(struct net_device *ndev) /* Since we have freed up a buffer, the ring is no longer full */ - if (fep->dirty_tx != fep->cur_tx) { - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); - } + entries = fec_enet_get_free_txdesc_num(fep); + if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev)) + netif_wake_queue(ndev); } return; } - /* During a receive, the cur_rx points to the current incoming buffer. * When we update through the ring, if the next incoming buffer has * not been given to the system, we just set the empty indicator, @@ -2095,7 +2211,7 @@ static int fec_enet_init(struct net_device *ndev) if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { /* enable hw accelerator */ ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM - | NETIF_F_RXCSUM); + | NETIF_F_RXCSUM | NETIF_F_SG); fep->csum_flags |= FLAG_RX_CSUM_ENABLED; } From 79f339125ea316e910220e5f5b4ad30370f4de85 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:23 +0800 Subject: [PATCH 6/6] net: fec: Add software TSO support Add software TSO support for FEC. This feature allows to improve outbound throughput performance. Tested on imx6dl sabresd board, running iperf tcp tests shows: - 16.2% improvement comparing with FEC SG patch - 82% improvement comparing with NO SG & TSO patch $ ethtool -K eth0 tso on $ iperf -c 10.192.242.167 -t 3 & [ 3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0- 3.0 sec 181 MBytes 506 Mbits/sec During the testing, CPU loading is 30%. Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the performance with SW TSO is a milestone. CC: Ezequiel Garcia CC: Eric Dumazet CC: David Laight CC: Li Frank Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 6 + drivers/net/ethernet/freescale/fec_main.c | 259 +++++++++++++++++++--- 2 files changed, 240 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index e7ce14d8d3c3..671d080105a7 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -299,6 +299,12 @@ struct fec_enet_private { unsigned short bufdesc_size; unsigned short tx_ring_size; unsigned short rx_ring_size; + unsigned short tx_stop_threshold; + unsigned short tx_wake_threshold; + + /* Software TSO */ + char *tso_hdrs; + dma_addr_t tso_hdrs_dma; struct platform_device *pdev; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bea00a8d6c99..38d9d276ab8b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define FEC_PAUSE_FLAG_AUTONEG 0x1 #define FEC_PAUSE_FLAG_ENABLE 0x2 +#define TSO_HEADER_SIZE 128 +/* Max number of allowed TCP segments for software TSO */ +#define FEC_MAX_TSO_SEGS 100 +#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + +#define IS_TSO_HEADER(txq, addr) \ + ((addr >= txq->tso_hdrs_dma) && \ + (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE)) + static int mii_cnt; static inline @@ -438,8 +448,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) unsigned short buflen; unsigned int estatus = 0; unsigned int index; + int entries_free; int ret; + entries_free = fec_enet_get_free_txdesc_num(fep); + if (entries_free < MAX_SKB_FRAGS + 1) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "NOT enough BD for SG!\n"); + return NETDEV_TX_OK; + } + /* Protocol checksum off-load for TCP and UDP. */ if (fec_enet_clear_csum(skb, ndev)) { dev_kfree_skb_any(skb); @@ -534,35 +553,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) return 0; } +static int +fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev, + struct bufdesc *bdp, int index, char *data, + int size, bool last_tcp, bool is_last) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + unsigned short status; + unsigned int estatus = 0; + + status = bdp->cbd_sc; + status &= ~BD_ENET_TX_STATS; + + status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); + bdp->cbd_datlen = size; + + if (((unsigned long) data) & FEC_ALIGNMENT || + id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { + memcpy(fep->tx_bounce[index], data, size); + data = fep->tx_bounce[index]; + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(data, size); + } + + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, + size, DMA_TO_DEVICE); + if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); + return NETDEV_TX_BUSY; + } + + if (fep->bufdesc_ex) { + if (skb->ip_summed == CHECKSUM_PARTIAL) + estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = estatus; + } + + /* Handle the last BD specially */ + if (last_tcp) + status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC); + if (is_last) { + status |= BD_ENET_TX_INTR; + if (fep->bufdesc_ex) + ebdp->cbd_esc |= BD_ENET_TX_INT; + } + + bdp->cbd_sc = status; + + return 0; +} + +static int +fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev, + struct bufdesc *bdp, int index) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + void *bufaddr; + unsigned long dmabuf; + unsigned short status; + unsigned int estatus = 0; + + status = bdp->cbd_sc; + status &= ~BD_ENET_TX_STATS; + status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); + + bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE; + dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE; + if (((unsigned long) bufaddr) & FEC_ALIGNMENT || + id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { + memcpy(fep->tx_bounce[index], skb->data, hdr_len); + bufaddr = fep->tx_bounce[index]; + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(bufaddr, hdr_len); + + dmabuf = dma_map_single(&fep->pdev->dev, bufaddr, + hdr_len, DMA_TO_DEVICE); + if (dma_mapping_error(&fep->pdev->dev, dmabuf)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); + return NETDEV_TX_BUSY; + } + } + + bdp->cbd_bufaddr = dmabuf; + bdp->cbd_datlen = hdr_len; + + if (fep->bufdesc_ex) { + if (skb->ip_summed == CHECKSUM_PARTIAL) + estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = estatus; + } + + bdp->cbd_sc = status; + + return 0; +} + +static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int total_len, data_left; + struct bufdesc *bdp = fep->cur_tx; + struct tso_t tso; + unsigned int index = 0; + int ret; + + if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "NOT enough BD for TSO!\n"); + return NETDEV_TX_OK; + } + + /* Protocol checksum off-load for TCP and UDP. */ + if (fec_enet_clear_csum(skb, ndev)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* Initialize the TSO handler, and prepare the first payload */ + tso_start(skb, &tso); + + total_len = skb->len - hdr_len; + while (total_len > 0) { + char *hdr; + + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + + /* prepare packet headers: MAC + IP + TCP */ + hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE; + tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); + ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index); + if (ret) + goto err_release; + + while (data_left > 0) { + int size; + + size = min_t(int, tso.size, data_left); + bdp = fec_enet_get_nextdesc(bdp, fep); + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); + ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data, + size, size == data_left, + total_len == 0); + if (ret) + goto err_release; + + data_left -= size; + tso_build_data(skb, &tso, size); + } + + bdp = fec_enet_get_nextdesc(bdp, fep); + } + + /* Save skb pointer */ + fep->tx_skbuff[index] = skb; + + fec_enet_submit_work(bdp, fep); + + skb_tx_timestamp(skb); + fep->cur_tx = bdp; + + /* Trigger transmission start */ + writel(0, fep->hwp + FEC_X_DES_ACTIVE); + + return 0; + +err_release: + /* TODO: Release all used data descriptors for TSO */ + return ret; +} + static netdev_tx_t fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct bufdesc *bdp; - unsigned short status; int entries_free; int ret; - /* Fill in a Tx ring entry */ - bdp = fep->cur_tx; - - status = bdp->cbd_sc; - - if (status & BD_ENET_TX_READY) { - /* Ooops. All transmit buffers are full. Bail out. - * This should not happen, since ndev->tbusy should be set. - */ - if (net_ratelimit()) - netdev_err(ndev, "tx queue full!\n"); - return NETDEV_TX_BUSY; - } - - ret = fec_enet_txq_submit_skb(skb, ndev); + if (skb_is_gso(skb)) + ret = fec_enet_txq_submit_tso(skb, ndev); + else + ret = fec_enet_txq_submit_skb(skb, ndev); if (ret) return ret; entries_free = fec_enet_get_free_txdesc_num(fep); - if (entries_free < MAX_SKB_FRAGS + 1) + if (entries_free <= fep->tx_stop_threshold) netif_stop_queue(ndev); return NETDEV_TX_OK; @@ -883,7 +1077,7 @@ fec_enet_tx(struct net_device *ndev) unsigned short status; struct sk_buff *skb; int index = 0; - int entries; + int entries_free; fep = netdev_priv(ndev); bdp = fep->dirty_tx; @@ -900,8 +1094,9 @@ fec_enet_tx(struct net_device *ndev) index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); skb = fep->tx_skbuff[index]; - dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen, - DMA_TO_DEVICE); + if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr)) + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + bdp->cbd_datlen, DMA_TO_DEVICE); bdp->cbd_bufaddr = 0; if (!skb) { bdp = fec_enet_get_nextdesc(bdp, fep); @@ -962,9 +1157,11 @@ fec_enet_tx(struct net_device *ndev) /* Since we have freed up a buffer, the ring is no longer full */ - entries = fec_enet_get_free_txdesc_num(fep); - if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + if (netif_queue_stopped(ndev)) { + entries_free = fec_enet_get_free_txdesc_num(fep); + if (entries_free >= fep->tx_wake_threshold) + netif_wake_queue(ndev); + } } return; } @@ -2166,6 +2363,9 @@ static int fec_enet_init(struct net_device *ndev) fep->tx_ring_size = TX_RING_SIZE; fep->rx_ring_size = RX_RING_SIZE; + fep->tx_stop_threshold = FEC_MAX_SKB_DESCS; + fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2; + if (fep->bufdesc_ex) fep->bufdesc_size = sizeof(struct bufdesc_ex); else @@ -2179,6 +2379,13 @@ static int fec_enet_init(struct net_device *ndev) if (!cbd_base) return -ENOMEM; + fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE, + &fep->tso_hdrs_dma, GFP_KERNEL); + if (!fep->tso_hdrs) { + dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma); + return -ENOMEM; + } + memset(cbd_base, 0, PAGE_SIZE); fep->netdev = ndev; @@ -2209,9 +2416,11 @@ static int fec_enet_init(struct net_device *ndev) ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { + ndev->gso_max_segs = FEC_MAX_TSO_SEGS; + /* enable hw accelerator */ ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM - | NETIF_F_RXCSUM | NETIF_F_SG); + | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO); fep->csum_flags |= FLAG_RX_CSUM_ENABLED; }