diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 4953c1494723..be43cd08027b 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -571,6 +571,7 @@ config VMXNET3 tristate "VMware VMXNET3 ethernet driver" depends on PCI && INET depends on PAGE_SIZE_LESS_THAN_64KB + select PAGE_POOL help This driver supports VMware's vmxnet3 virtual ethernet NIC. To compile this driver as a module, choose M here: the diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index a666a88ac1ff..f82870c10205 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -32,4 +32,4 @@ obj-$(CONFIG_VMXNET3) += vmxnet3.o -vmxnet3-objs := vmxnet3_drv.o vmxnet3_ethtool.o +vmxnet3-objs := vmxnet3_drv.o vmxnet3_ethtool.o vmxnet3_xdp.o diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 7fa74b8b2100..0578864792b6 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -28,6 +28,7 @@ #include #include "vmxnet3_int.h" +#include "vmxnet3_xdp.h" char vmxnet3_driver_name[] = "vmxnet3"; #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver" @@ -338,14 +339,16 @@ static void vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi, struct pci_dev *pdev) { - if (tbi->map_type == VMXNET3_MAP_SINGLE) + u32 map_type = tbi->map_type; + + if (map_type & VMXNET3_MAP_SINGLE) dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len, DMA_TO_DEVICE); - else if (tbi->map_type == VMXNET3_MAP_PAGE) + else if (map_type & VMXNET3_MAP_PAGE) dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len, DMA_TO_DEVICE); else - BUG_ON(tbi->map_type != VMXNET3_MAP_NONE); + BUG_ON(map_type & ~VMXNET3_MAP_XDP); tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */ } @@ -353,19 +356,20 @@ vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi, static int vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, - struct pci_dev *pdev, struct vmxnet3_adapter *adapter) + struct pci_dev *pdev, struct vmxnet3_adapter *adapter, + struct xdp_frame_bulk *bq) { - struct sk_buff *skb; + struct vmxnet3_tx_buf_info *tbi; int entries = 0; + u32 map_type; /* no out of order completion */ BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp); BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1); - skb = tq->buf_info[eop_idx].skb; - BUG_ON(skb == NULL); - tq->buf_info[eop_idx].skb = NULL; - + tbi = &tq->buf_info[eop_idx]; + BUG_ON(!tbi->skb); + map_type = tbi->map_type; VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size); while (tq->tx_ring.next2comp != eop_idx) { @@ -381,7 +385,14 @@ vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, entries++; } - dev_kfree_skb_any(skb); + if (map_type & VMXNET3_MAP_XDP) + xdp_return_frame_bulk(tbi->xdpf, bq); + else + dev_kfree_skb_any(tbi->skb); + + /* xdpf and skb are in an anonymous union. */ + tbi->skb = NULL; + return entries; } @@ -390,8 +401,12 @@ static int vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { - int completed = 0; union Vmxnet3_GenericDesc *gdesc; + struct xdp_frame_bulk bq; + int completed = 0; + + xdp_frame_bulk_init(&bq); + rcu_read_lock(); gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { @@ -402,11 +417,13 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( &gdesc->tcd), tq, adapter->pdev, - adapter); + adapter, &bq); vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring); gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; } + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); if (completed) { spin_lock(&tq->tx_lock); @@ -426,26 +443,36 @@ static void vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { + struct xdp_frame_bulk bq; + u32 map_type; int i; + xdp_frame_bulk_init(&bq); + rcu_read_lock(); + while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) { struct vmxnet3_tx_buf_info *tbi; tbi = tq->buf_info + tq->tx_ring.next2comp; + map_type = tbi->map_type; vmxnet3_unmap_tx_buf(tbi, adapter->pdev); if (tbi->skb) { - dev_kfree_skb_any(tbi->skb); + if (map_type & VMXNET3_MAP_XDP) + xdp_return_frame_bulk(tbi->xdpf, &bq); + else + dev_kfree_skb_any(tbi->skb); tbi->skb = NULL; } vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring); } - /* sanity check, verify all buffers are indeed unmapped and freed */ - for (i = 0; i < tq->tx_ring.size; i++) { - BUG_ON(tq->buf_info[i].skb != NULL || - tq->buf_info[i].map_type != VMXNET3_MAP_NONE); - } + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); + + /* sanity check, verify all buffers are indeed unmapped */ + for (i = 0; i < tq->tx_ring.size; i++) + BUG_ON(tq->buf_info[i].map_type != VMXNET3_MAP_NONE); tq->tx_ring.gen = VMXNET3_INIT_GEN; tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; @@ -599,7 +626,17 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, gd = ring->base + ring->next2fill; rbi->comp_state = VMXNET3_RXD_COMP_PENDING; - if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { + if (rbi->buf_type == VMXNET3_RX_BUF_XDP) { + void *data = vmxnet3_pp_get_buff(rq->page_pool, + &rbi->dma_addr, + GFP_KERNEL); + if (!data) { + rq->stats.rx_buf_alloc_failure++; + break; + } + rbi->page = virt_to_page(data); + val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT; + } else if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { if (rbi->skb == NULL) { rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev, rbi->len, @@ -1263,6 +1300,63 @@ drop_pkt: return NETDEV_TX_OK; } +static int +vmxnet3_create_pp(struct vmxnet3_adapter *adapter, + struct vmxnet3_rx_queue *rq, int size) +{ + bool xdp_prog = vmxnet3_xdp_enabled(adapter); + const struct page_pool_params pp_params = { + .order = 0, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .pool_size = size, + .nid = NUMA_NO_NODE, + .dev = &adapter->pdev->dev, + .offset = VMXNET3_XDP_RX_OFFSET, + .max_len = VMXNET3_XDP_MAX_FRSIZE, + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + }; + struct page_pool *pp; + int err; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) + return PTR_ERR(pp); + + err = xdp_rxq_info_reg(&rq->xdp_rxq, adapter->netdev, rq->qid, + rq->napi.napi_id); + if (err < 0) + goto err_free_pp; + + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, pp); + if (err) + goto err_unregister_rxq; + + rq->page_pool = pp; + + return 0; + +err_unregister_rxq: + xdp_rxq_info_unreg(&rq->xdp_rxq); +err_free_pp: + page_pool_destroy(pp); + + return err; +} + +void * +vmxnet3_pp_get_buff(struct page_pool *pp, dma_addr_t *dma_addr, + gfp_t gfp_mask) +{ + struct page *page; + + page = page_pool_alloc_pages(pp, gfp_mask | __GFP_NOWARN); + if (unlikely(!page)) + return NULL; + + *dma_addr = page_pool_get_dma_addr(page) + pp->p.offset; + + return page_address(page); +} static netdev_tx_t vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev) @@ -1423,6 +1517,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxDesc rxCmdDesc; struct Vmxnet3_RxCompDesc rxComp; #endif + bool need_flush = false; + vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); while (rcd->gen == rq->comp_ring.gen) { @@ -1463,6 +1559,31 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, goto rcd_done; } + if (rcd->sop && rcd->eop && vmxnet3_xdp_enabled(adapter)) { + struct sk_buff *skb_xdp_pass; + int act; + + if (VMXNET3_RX_DATA_RING(adapter, rcd->rqID)) { + ctx->skb = NULL; + goto skip_xdp; /* Handle it later. */ + } + + if (rbi->buf_type != VMXNET3_RX_BUF_XDP) + goto rcd_done; + + act = vmxnet3_process_xdp(adapter, rq, rcd, rbi, rxd, + &skb_xdp_pass); + if (act == XDP_PASS) { + ctx->skb = skb_xdp_pass; + goto sop_done; + } + ctx->skb = NULL; + need_flush |= act == XDP_REDIRECT; + + goto rcd_done; + } +skip_xdp: + if (rcd->sop) { /* first buf of the pkt */ bool rxDataRingUsed; u16 len; @@ -1471,7 +1592,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, (rcd->rqID != rq->qid && rcd->rqID != rq->dataRingQid)); - BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB); + BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB && + rbi->buf_type != VMXNET3_RX_BUF_XDP); BUG_ON(ctx->skb != NULL || rbi->skb == NULL); if (unlikely(rcd->len == 0)) { @@ -1489,6 +1611,25 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, rxDataRingUsed = VMXNET3_RX_DATA_RING(adapter, rcd->rqID); len = rxDataRingUsed ? rcd->len : rbi->len; + + if (rxDataRingUsed && vmxnet3_xdp_enabled(adapter)) { + struct sk_buff *skb_xdp_pass; + size_t sz; + int act; + + sz = rcd->rxdIdx * rq->data_ring.desc_size; + act = vmxnet3_process_xdp_small(adapter, rq, + &rq->data_ring.base[sz], + rcd->len, + &skb_xdp_pass); + if (act == XDP_PASS) { + ctx->skb = skb_xdp_pass; + goto sop_done; + } + need_flush |= act == XDP_REDIRECT; + + goto rcd_done; + } new_skb = netdev_alloc_skb_ip_align(adapter->netdev, len); if (new_skb == NULL) { @@ -1621,6 +1762,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, } +sop_done: skb = ctx->skb; if (rcd->eop) { u32 mtu = adapter->netdev->mtu; @@ -1757,6 +1899,8 @@ refill_buf: vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); } + if (need_flush) + xdp_do_flush(); return num_pkts; } @@ -1775,24 +1919,32 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { + struct vmxnet3_rx_buf_info *rbi; #ifdef __BIG_ENDIAN_BITFIELD struct Vmxnet3_RxDesc rxDesc; #endif + + rbi = &rq->buf_info[ring_idx][i]; vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc); if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD && - rq->buf_info[ring_idx][i].skb) { + rbi->page && rbi->buf_type == VMXNET3_RX_BUF_XDP) { + page_pool_recycle_direct(rq->page_pool, + rbi->page); + rbi->page = NULL; + } else if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD && + rbi->skb) { dma_unmap_single(&adapter->pdev->dev, rxd->addr, rxd->len, DMA_FROM_DEVICE); - dev_kfree_skb(rq->buf_info[ring_idx][i].skb); - rq->buf_info[ring_idx][i].skb = NULL; + dev_kfree_skb(rbi->skb); + rbi->skb = NULL; } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY && - rq->buf_info[ring_idx][i].page) { + rbi->page) { dma_unmap_page(&adapter->pdev->dev, rxd->addr, rxd->len, DMA_FROM_DEVICE); - put_page(rq->buf_info[ring_idx][i].page); - rq->buf_info[ring_idx][i].page = NULL; + put_page(rbi->page); + rbi->page = NULL; } } @@ -1813,6 +1965,7 @@ vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter); + rcu_assign_pointer(adapter->xdp_bpf_prog, NULL); } @@ -1842,6 +1995,11 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, } } + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); + rq->page_pool = NULL; + if (rq->data_ring.base) { dma_free_coherent(&adapter->pdev->dev, rq->rx_ring[0].size * rq->data_ring.desc_size, @@ -1885,14 +2043,16 @@ static int vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) { - int i; + int i, err; /* initialize buf_info */ for (i = 0; i < rq->rx_ring[0].size; i++) { - /* 1st buf for a pkt is skbuff */ + /* 1st buf for a pkt is skbuff or xdp page */ if (i % adapter->rx_buf_per_pkt == 0) { - rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB; + rq->buf_info[0][i].buf_type = vmxnet3_xdp_enabled(adapter) ? + VMXNET3_RX_BUF_XDP : + VMXNET3_RX_BUF_SKB; rq->buf_info[0][i].len = adapter->skb_buf_size; } else { /* subsequent bufs for a pkt is frag */ rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE; @@ -1913,8 +2073,18 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, rq->rx_ring[i].gen = VMXNET3_INIT_GEN; rq->rx_ring[i].isOutOfOrder = 0; } + + err = vmxnet3_create_pp(adapter, rq, + rq->rx_ring[0].size + rq->rx_ring[1].size); + if (err) + return err; + if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, adapter) == 0) { + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); + rq->page_pool = NULL; + /* at least has 1 rx buffer for the 1st ring */ return -ENOMEM; } @@ -2016,7 +2186,7 @@ err: } -static int +int vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter) { int i, err = 0; @@ -3053,7 +3223,7 @@ vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter) } -static void +void vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) { size_t sz, i, ring0_size, ring1_size, comp_size; @@ -3612,6 +3782,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = vmxnet3_netpoll, #endif + .ndo_bpf = vmxnet3_xdp, + .ndo_xdp_xmit = vmxnet3_xdp_xmit, }; int err; u32 ver; @@ -3864,6 +4036,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, SET_NETDEV_DEV(netdev, &pdev->dev); vmxnet3_declare_features(adapter); + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; adapter->rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ? VMXNET3_DEF_RXDATA_DESC_SIZE : 0; diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 18cf7c723201..98c22d7d87a2 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -28,6 +28,7 @@ #include "vmxnet3_int.h" #include #include +#include "vmxnet3_xdp.h" #define VXLAN_UDP_PORT 8472 @@ -76,6 +77,10 @@ vmxnet3_tq_driver_stats[] = { copy_skb_header) }, { " giant hdr", offsetof(struct vmxnet3_tq_driver_stats, oversized_hdr) }, + { " xdp xmit", offsetof(struct vmxnet3_tq_driver_stats, + xdp_xmit) }, + { " xdp xmit err", offsetof(struct vmxnet3_tq_driver_stats, + xdp_xmit_err) }, }; /* per rq stats maintained by the device */ @@ -106,6 +111,16 @@ vmxnet3_rq_driver_stats[] = { drop_fcs) }, { " rx buf alloc fail", offsetof(struct vmxnet3_rq_driver_stats, rx_buf_alloc_failure) }, + { " xdp packets", offsetof(struct vmxnet3_rq_driver_stats, + xdp_packets) }, + { " xdp tx", offsetof(struct vmxnet3_rq_driver_stats, + xdp_tx) }, + { " xdp redirects", offsetof(struct vmxnet3_rq_driver_stats, + xdp_redirects) }, + { " xdp drops", offsetof(struct vmxnet3_rq_driver_stats, + xdp_drops) }, + { " xdp aborted", offsetof(struct vmxnet3_rq_driver_stats, + xdp_aborted) }, }; /* global stats maintained by the driver */ @@ -249,10 +264,18 @@ vmxnet3_get_strings(struct net_device *netdev, u32 stringset, u8 *buf) netdev_features_t vmxnet3_fix_features(struct net_device *netdev, netdev_features_t features) { + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + /* If Rx checksum is disabled, then LRO should also be disabled */ if (!(features & NETIF_F_RXCSUM)) features &= ~NETIF_F_LRO; + /* If XDP is enabled, then LRO should not be enabled */ + if (vmxnet3_xdp_enabled(adapter) && (features & NETIF_F_LRO)) { + netdev_err(netdev, "LRO is not supported with XDP"); + features &= ~NETIF_F_LRO; + } + return features; } diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 3367db23aa13..915aaf18c409 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -56,6 +56,9 @@ #include #include #include +#include +#include +#include #include "vmxnet3_defs.h" @@ -188,19 +191,20 @@ struct vmxnet3_tx_data_ring { dma_addr_t basePA; }; -enum vmxnet3_buf_map_type { - VMXNET3_MAP_INVALID = 0, - VMXNET3_MAP_NONE, - VMXNET3_MAP_SINGLE, - VMXNET3_MAP_PAGE, -}; +#define VMXNET3_MAP_NONE 0 +#define VMXNET3_MAP_SINGLE BIT(0) +#define VMXNET3_MAP_PAGE BIT(1) +#define VMXNET3_MAP_XDP BIT(2) struct vmxnet3_tx_buf_info { u32 map_type; u16 len; u16 sop_idx; dma_addr_t dma_addr; - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; }; struct vmxnet3_tq_driver_stats { @@ -217,6 +221,9 @@ struct vmxnet3_tq_driver_stats { u64 linearized; /* # of pkts linearized */ u64 copy_skb_header; /* # of times we have to copy skb header */ u64 oversized_hdr; + + u64 xdp_xmit; + u64 xdp_xmit_err; }; struct vmxnet3_tx_ctx { @@ -253,12 +260,13 @@ struct vmxnet3_tx_queue { * stopped */ int qid; u16 txdata_desc_size; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); +} ____cacheline_aligned; enum vmxnet3_rx_buf_type { VMXNET3_RX_BUF_NONE = 0, VMXNET3_RX_BUF_SKB = 1, - VMXNET3_RX_BUF_PAGE = 2 + VMXNET3_RX_BUF_PAGE = 2, + VMXNET3_RX_BUF_XDP = 3, }; #define VMXNET3_RXD_COMP_PENDING 0 @@ -285,6 +293,12 @@ struct vmxnet3_rq_driver_stats { u64 drop_err; u64 drop_fcs; u64 rx_buf_alloc_failure; + + u64 xdp_packets; /* Total packets processed by XDP. */ + u64 xdp_tx; + u64 xdp_redirects; + u64 xdp_drops; + u64 xdp_aborted; }; struct vmxnet3_rx_data_ring { @@ -307,7 +321,9 @@ struct vmxnet3_rx_queue { struct vmxnet3_rx_buf_info *buf_info[2]; struct Vmxnet3_RxQueueCtrl *shared; struct vmxnet3_rq_driver_stats stats; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); + struct page_pool *page_pool; + struct xdp_rxq_info xdp_rxq; +} ____cacheline_aligned; #define VMXNET3_DEVICE_MAX_TX_QUEUES 32 #define VMXNET3_DEVICE_MAX_RX_QUEUES 32 /* Keep this value as a power of 2 */ @@ -415,6 +431,7 @@ struct vmxnet3_adapter { u16 tx_prod_offset; u16 rx_prod_offset; u16 rx_prod2_offset; + struct bpf_prog __rcu *xdp_bpf_prog; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -490,6 +507,12 @@ vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter); void vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter); +int +vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter); + +void +vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter); + netdev_features_t vmxnet3_fix_features(struct net_device *netdev, netdev_features_t features); diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c new file mode 100644 index 000000000000..80ddaff759d4 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * Copyright (C) 2008-2023, VMware, Inc. All Rights Reserved. + * Maintained by: pv-drivers@vmware.com + * + */ + +#include "vmxnet3_int.h" +#include "vmxnet3_xdp.h" + +static void +vmxnet3_xdp_exchange_program(struct vmxnet3_adapter *adapter, + struct bpf_prog *prog) +{ + rcu_assign_pointer(adapter->xdp_bpf_prog, prog); +} + +static inline struct vmxnet3_tx_queue * +vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) +{ + struct vmxnet3_tx_queue *tq; + int tq_number; + int cpu; + + tq_number = adapter->num_tx_queues; + cpu = smp_processor_id(); + if (likely(cpu < tq_number)) + tq = &adapter->tx_queue[cpu]; + else + tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + + return tq; +} + +static int +vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf, + struct netlink_ext_ack *extack) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *new_bpf_prog = bpf->prog; + struct bpf_prog *old_bpf_prog; + bool need_update; + bool running; + int err; + + if (new_bpf_prog && netdev->mtu > VMXNET3_XDP_MAX_MTU) { + NL_SET_ERR_MSG_FMT_MOD(extack, "MTU %u too large for XDP", + netdev->mtu); + return -EOPNOTSUPP; + } + + if (adapter->netdev->features & NETIF_F_LRO) { + NL_SET_ERR_MSG_MOD(extack, "LRO is not supported with XDP"); + adapter->netdev->features &= ~NETIF_F_LRO; + } + + old_bpf_prog = rcu_dereference(adapter->xdp_bpf_prog); + if (!new_bpf_prog && !old_bpf_prog) + return 0; + + running = netif_running(netdev); + need_update = !!old_bpf_prog != !!new_bpf_prog; + + if (running && need_update) + vmxnet3_quiesce_dev(adapter); + + vmxnet3_xdp_exchange_program(adapter, new_bpf_prog); + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); + + if (!running || !need_update) + return 0; + + if (new_bpf_prog) + xdp_features_set_redirect_target(netdev, false); + else + xdp_features_clear_redirect_target(netdev); + + vmxnet3_reset_dev(adapter); + vmxnet3_rq_destroy_all(adapter); + vmxnet3_adjust_rx_ring_size(adapter); + err = vmxnet3_rq_create_all(adapter); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "failed to re-create rx queues for XDP."); + return -EOPNOTSUPP; + } + err = vmxnet3_activate_dev(adapter); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "failed to activate device for XDP."); + return -EOPNOTSUPP; + } + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); + + return 0; +} + +/* This is the main xdp call used by kernel to set/unset eBPF program. */ +int +vmxnet3_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case XDP_SETUP_PROG: + return vmxnet3_xdp_set(netdev, bpf, bpf->extack); + default: + return -EINVAL; + } + + return 0; +} + +static int +vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, + struct xdp_frame *xdpf, + struct vmxnet3_tx_queue *tq, bool dma_map) +{ + struct vmxnet3_tx_buf_info *tbi = NULL; + union Vmxnet3_GenericDesc *gdesc; + struct vmxnet3_tx_ctx ctx; + int tx_num_deferred; + struct page *page; + u32 buf_size; + u32 dw2; + + dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; + dw2 |= xdpf->len; + ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; + gdesc = ctx.sop_txd; + + buf_size = xdpf->len; + tbi = tq->buf_info + tq->tx_ring.next2fill; + + if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { + tq->stats.tx_ring_full++; + return -ENOSPC; + } + + tbi->map_type = VMXNET3_MAP_XDP; + if (dma_map) { /* ndo_xdp_xmit */ + tbi->dma_addr = dma_map_single(&adapter->pdev->dev, + xdpf->data, buf_size, + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + return -EFAULT; + tbi->map_type |= VMXNET3_MAP_SINGLE; + } else { /* XDP buffer from page pool */ + page = virt_to_page(xdpf->data); + tbi->dma_addr = page_pool_get_dma_addr(page) + + VMXNET3_XDP_HEADROOM; + dma_sync_single_for_device(&adapter->pdev->dev, + tbi->dma_addr, buf_size, + DMA_TO_DEVICE); + } + tbi->xdpf = xdpf; + tbi->len = buf_size; + + gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; + WARN_ON_ONCE(gdesc->txd.gen == tq->tx_ring.gen); + + gdesc->txd.addr = cpu_to_le64(tbi->dma_addr); + gdesc->dword[2] = cpu_to_le32(dw2); + + /* Setup the EOP desc */ + gdesc->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP); + + gdesc->txd.om = 0; + gdesc->txd.msscof = 0; + gdesc->txd.hlen = 0; + gdesc->txd.ti = 0; + + tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); + le32_add_cpu(&tq->shared->txNumDeferred, 1); + tx_num_deferred++; + + vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); + + /* set the last buf_info for the pkt */ + tbi->sop_idx = ctx.sop_txd - tq->tx_ring.base; + + dma_wmb(); + gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ + VMXNET3_TXD_GEN); + + /* No need to handle the case when tx_num_deferred doesn't reach + * threshold. Backend driver at hypervisor side will poll and reset + * tq->shared->txNumDeferred to 0. + */ + if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { + tq->shared->txNumDeferred = 0; + VMXNET3_WRITE_BAR0_REG(adapter, + VMXNET3_REG_TXPROD + tq->qid * 8, + tq->tx_ring.next2fill); + } + + return 0; +} + +static int +vmxnet3_xdp_xmit_back(struct vmxnet3_adapter *adapter, + struct xdp_frame *xdpf) +{ + struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; + int err; + + tq = vmxnet3_xdp_get_tq(adapter); + if (tq->stopped) + return -ENETDOWN; + + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); + err = vmxnet3_xdp_xmit_frame(adapter, xdpf, tq, false); + __netif_tx_unlock(nq); + + return err; +} + +/* ndo_xdp_xmit */ +int +vmxnet3_xdp_xmit(struct net_device *dev, + int n, struct xdp_frame **frames, u32 flags) +{ + struct vmxnet3_adapter *adapter = netdev_priv(dev); + struct vmxnet3_tx_queue *tq; + int i; + + if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) + return -ENETDOWN; + if (unlikely(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))) + return -EINVAL; + + tq = vmxnet3_xdp_get_tq(adapter); + if (tq->stopped) + return -ENETDOWN; + + for (i = 0; i < n; i++) { + if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { + tq->stats.xdp_xmit_err++; + break; + } + } + tq->stats.xdp_xmit += i; + + return i; +} + +static int +vmxnet3_run_xdp(struct vmxnet3_rx_queue *rq, struct xdp_buff *xdp, + struct bpf_prog *prog) +{ + struct xdp_frame *xdpf; + struct page *page; + int err; + u32 act; + + rq->stats.xdp_packets++; + act = bpf_prog_run_xdp(prog, xdp); + page = virt_to_page(xdp->data_hard_start); + + switch (act) { + case XDP_PASS: + return act; + case XDP_REDIRECT: + err = xdp_do_redirect(rq->adapter->netdev, xdp, prog); + if (!err) { + rq->stats.xdp_redirects++; + } else { + rq->stats.xdp_drops++; + page_pool_recycle_direct(rq->page_pool, page); + } + return act; + case XDP_TX: + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf || + vmxnet3_xdp_xmit_back(rq->adapter, xdpf))) { + rq->stats.xdp_drops++; + page_pool_recycle_direct(rq->page_pool, page); + } else { + rq->stats.xdp_tx++; + } + return act; + default: + bpf_warn_invalid_xdp_action(rq->adapter->netdev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(rq->adapter->netdev, prog, act); + rq->stats.xdp_aborted++; + break; + case XDP_DROP: + rq->stats.xdp_drops++; + break; + } + + page_pool_recycle_direct(rq->page_pool, page); + + return act; +} + +static struct sk_buff * +vmxnet3_build_skb(struct vmxnet3_rx_queue *rq, struct page *page, + const struct xdp_buff *xdp) +{ + struct sk_buff *skb; + + skb = build_skb(page_address(page), PAGE_SIZE); + if (unlikely(!skb)) { + page_pool_recycle_direct(rq->page_pool, page); + rq->stats.rx_buf_alloc_failure++; + return NULL; + } + + /* bpf prog might change len and data position. */ + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); + skb_mark_for_recycle(skb); + + return skb; +} + +/* Handle packets from DataRing. */ +int +vmxnet3_process_xdp_small(struct vmxnet3_adapter *adapter, + struct vmxnet3_rx_queue *rq, + void *data, int len, + struct sk_buff **skb_xdp_pass) +{ + struct bpf_prog *xdp_prog; + struct xdp_buff xdp; + struct page *page; + int act; + + page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC); + if (unlikely(!page)) { + rq->stats.rx_buf_alloc_failure++; + return XDP_DROP; + } + + xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq); + xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset, + len, false); + xdp_buff_clear_frags_flag(&xdp); + + /* Must copy the data because it's at dataring. */ + memcpy(xdp.data, data, len); + + xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); + if (!xdp_prog) { + act = XDP_PASS; + goto out_skb; + } + act = vmxnet3_run_xdp(rq, &xdp, xdp_prog); + if (act != XDP_PASS) + return act; + +out_skb: + *skb_xdp_pass = vmxnet3_build_skb(rq, page, &xdp); + if (!*skb_xdp_pass) + return XDP_DROP; + + /* No need to refill. */ + return likely(*skb_xdp_pass) ? act : XDP_DROP; +} + +int +vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, + struct vmxnet3_rx_queue *rq, + struct Vmxnet3_RxCompDesc *rcd, + struct vmxnet3_rx_buf_info *rbi, + struct Vmxnet3_RxDesc *rxd, + struct sk_buff **skb_xdp_pass) +{ + struct bpf_prog *xdp_prog; + dma_addr_t new_dma_addr; + struct xdp_buff xdp; + struct page *page; + void *new_data; + int act; + + page = rbi->page; + dma_sync_single_for_cpu(&adapter->pdev->dev, + page_pool_get_dma_addr(page) + + rq->page_pool->p.offset, rcd->len, + page_pool_get_dma_dir(rq->page_pool)); + + xdp_init_buff(&xdp, rbi->len, &rq->xdp_rxq); + xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset, + rcd->len, false); + xdp_buff_clear_frags_flag(&xdp); + + xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); + if (!xdp_prog) { + act = XDP_PASS; + goto out_skb; + } + act = vmxnet3_run_xdp(rq, &xdp, xdp_prog); + + if (act == XDP_PASS) { +out_skb: + *skb_xdp_pass = vmxnet3_build_skb(rq, page, &xdp); + if (!*skb_xdp_pass) + act = XDP_DROP; + } + + new_data = vmxnet3_pp_get_buff(rq->page_pool, &new_dma_addr, + GFP_ATOMIC); + if (!new_data) { + rq->stats.rx_buf_alloc_failure++; + return XDP_DROP; + } + rbi->page = virt_to_page(new_data); + rbi->dma_addr = new_dma_addr; + rxd->addr = cpu_to_le64(rbi->dma_addr); + rxd->len = rbi->len; + + return act; +} diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.h b/drivers/net/vmxnet3/vmxnet3_xdp.h new file mode 100644 index 000000000000..f9d843e060a3 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_xdp.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * Linux driver for VMware's vmxnet3 ethernet NIC. + * Copyright (C) 2008-2023, VMware, Inc. All Rights Reserved. + * Maintained by: pv-drivers@vmware.com + * + */ + +#ifndef _VMXNET3_XDP_H +#define _VMXNET3_XDP_H + +#include +#include +#include + +#include "vmxnet3_int.h" + +#define VMXNET3_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) +#define VMXNET3_XDP_RX_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +#define VMXNET3_XDP_RX_OFFSET VMXNET3_XDP_HEADROOM +#define VMXNET3_XDP_MAX_FRSIZE (PAGE_SIZE - VMXNET3_XDP_HEADROOM - \ + VMXNET3_XDP_RX_TAILROOM) +#define VMXNET3_XDP_MAX_MTU (VMXNET3_XDP_MAX_FRSIZE - ETH_HLEN - \ + 2 * VLAN_HLEN - ETH_FCS_LEN) + +int vmxnet3_xdp(struct net_device *netdev, struct netdev_bpf *bpf); +int vmxnet3_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +int vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, + struct vmxnet3_rx_queue *rq, + struct Vmxnet3_RxCompDesc *rcd, + struct vmxnet3_rx_buf_info *rbi, + struct Vmxnet3_RxDesc *rxd, + struct sk_buff **skb_xdp_pass); +int vmxnet3_process_xdp_small(struct vmxnet3_adapter *adapter, + struct vmxnet3_rx_queue *rq, + void *data, int len, + struct sk_buff **skb_xdp_pass); +void *vmxnet3_pp_get_buff(struct page_pool *pp, dma_addr_t *dma_addr, + gfp_t gfp_mask); + +static inline bool vmxnet3_xdp_enabled(struct vmxnet3_adapter *adapter) +{ + return !!rcu_access_pointer(adapter->xdp_bpf_prog); +} + +#endif