From 66ce8e6b49df401854f0c98bed50a65e4167825b Mon Sep 17 00:00:00 2001 From: Rushil Gupta Date: Fri, 4 Aug 2023 21:34:41 +0000 Subject: [PATCH 1/4] gve: Control path for DQO-QPL GVE supports QPL ("queue-page-list") mode where all data is communicated through a set of pre-registered pages. Adding this mode to DQO descriptor format. Add checks, abi-changes and device options to support QPL mode for DQO in addition to GQI. Also, use pages-per-qpl supplied by device-option to control the size of the "queue-page-list". Signed-off-by: Rushil Gupta Reviewed-by: Willem de Bruijn Signed-off-by: Praveen Kaligineedi Signed-off-by: Bailey Forrest Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 29 ++++++- drivers/net/ethernet/google/gve/gve_adminq.c | 89 +++++++++++++++++--- drivers/net/ethernet/google/gve/gve_adminq.h | 10 +++ drivers/net/ethernet/google/gve/gve_main.c | 20 +++-- 4 files changed, 128 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index a31256f70348..3bd6eed7280f 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -52,6 +52,12 @@ #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 +#define DQO_QPL_DEFAULT_TX_PAGES 512 +#define DQO_QPL_DEFAULT_RX_PAGES 2048 + +/* Maximum TSO size supported on DQO */ +#define GVE_DQO_TX_MAX 0x3FFFF + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ @@ -218,6 +224,9 @@ struct gve_rx_ring { * which cannot be reused yet. */ struct gve_index_list used_buf_states; + + /* qpl assigned to this queue */ + struct gve_queue_page_list *qpl; } dqo; }; @@ -454,6 +463,12 @@ struct gve_tx_ring { s16 num_pending_packets; u32 complq_mask; /* complq size is complq_mask + 1 */ + + /* QPL fields */ + struct { + /* qpl assigned to this queue */ + struct gve_queue_page_list *qpl; + }; } dqo; } ____cacheline_aligned; struct netdev_queue *netdev_txq; @@ -532,6 +547,7 @@ enum gve_queue_format { GVE_GQI_RDA_FORMAT = 0x1, GVE_GQI_QPL_FORMAT = 0x2, GVE_DQO_RDA_FORMAT = 0x3, + GVE_DQO_QPL_FORMAT = 0x4, }; struct gve_priv { @@ -551,7 +567,8 @@ struct gve_priv { u16 num_event_counters; u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ - u16 tx_pages_per_qpl; /* tx buffer length */ + u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ + u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */ u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ @@ -809,11 +826,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx) return (priv->num_ntfy_blks / 2) + queue_idx; } +static inline bool gve_is_qpl(struct gve_priv *priv) +{ + return priv->queue_format == GVE_GQI_QPL_FORMAT || + priv->queue_format == GVE_DQO_QPL_FORMAT; +} + /* Returns the number of tx queue page lists */ static inline u32 gve_num_tx_qpls(struct gve_priv *priv) { - if (priv->queue_format != GVE_GQI_QPL_FORMAT) + if (!gve_is_qpl(priv)) return 0; return priv->tx_cfg.num_queues + priv->num_xdp_queues; @@ -833,7 +856,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) */ static inline u32 gve_num_rx_qpls(struct gve_priv *priv) { - if (priv->queue_format != GVE_GQI_QPL_FORMAT) + if (!gve_is_qpl(priv)) return 0; return priv->rx_cfg.num_queues; diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 252974202a3f..79db7a6d42bc 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_gqi_rda **dev_op_gqi_rda, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, - struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) + struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv, } *dev_op_dqo_rda = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_DQO_QPL: + if (option_length < sizeof(**dev_op_dqo_qpl) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "DQO QPL", (int)sizeof(**dev_op_dqo_qpl), + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_dqo_qpl)) { + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL"); + } + *dev_op_dqo_qpl = (void *)(option + 1); + break; case GVE_DEV_OPT_ID_JUMBO_FRAMES: if (option_length < sizeof(**dev_op_jumbo_frames) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) { @@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_gqi_rda **dev_op_gqi_rda, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, - struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) + struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, - dev_op_dqo_rda, dev_op_jumbo_frames); + dev_op_dqo_rda, dev_op_jumbo_frames, + dev_op_dqo_qpl); dev_opt = next_opt; } @@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { + u16 comp_ring_size; + u32 qpl_id = 0; + + if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + qpl_id = GVE_RAW_ADDRESSING_QPL_ID; + comp_ring_size = + priv->options_dqo_rda.tx_comp_ring_entries; + } else { + qpl_id = tx->dqo.qpl->id; + comp_ring_size = priv->tx_desc_cnt; + } + cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); cmd.create_tx_queue.tx_ring_size = cpu_to_be16(priv->tx_desc_cnt); cmd.create_tx_queue.tx_comp_ring_addr = cpu_to_be64(tx->complq_bus_dqo); cmd.create_tx_queue.tx_comp_ring_size = - cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries); + cpu_to_be16(comp_ring_size); } return gve_adminq_issue_cmd(priv, &cmd); @@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); } else { + u16 rx_buff_ring_entries; + u32 qpl_id = 0; + + if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + qpl_id = GVE_RAW_ADDRESSING_QPL_ID; + rx_buff_ring_entries = + priv->options_dqo_rda.rx_buff_ring_entries; + } else { + qpl_id = rx->dqo.qpl->id; + rx_buff_ring_entries = priv->rx_desc_cnt; + } + cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); cmd.create_rx_queue.rx_ring_size = cpu_to_be16(priv->rx_desc_cnt); cmd.create_rx_queue.rx_desc_ring_addr = @@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(priv->data_buffer_size_dqo); cmd.create_rx_queue.rx_buff_ring_size = - cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries); + cpu_to_be16(rx_buff_ring_entries); cmd.create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); } @@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv, const struct gve_device_option_dqo_rda *dev_op_dqo_rda) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); + priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); + + if (priv->queue_format == GVE_DQO_QPL_FORMAT) + return 0; + priv->options_dqo_rda.tx_comp_ring_entries = be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); - priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); priv->options_dqo_rda.rx_buff_ring_entries = be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); @@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv, static void gve_enable_supported_features(struct gve_priv *priv, u32 supported_features_mask, const struct gve_device_option_jumbo_frames - *dev_op_jumbo_frames) + *dev_op_jumbo_frames, + const struct gve_device_option_dqo_qpl + *dev_op_dqo_qpl) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -699,6 +748,18 @@ static void gve_enable_supported_features(struct gve_priv *priv, "JUMBO FRAMES device option enabled.\n"); priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu); } + + /* Override pages for qpl for DQO-QPL */ + if (dev_op_dqo_qpl) { + priv->tx_pages_per_qpl = + be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); + priv->rx_pages_per_qpl = + be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl); + if (priv->tx_pages_per_qpl == 0) + priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; + if (priv->rx_pages_per_qpl == 0) + priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; + } } int gve_adminq_describe_device(struct gve_priv *priv) @@ -707,6 +768,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; + struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL; struct gve_device_descriptor *descriptor; u32 supported_features_mask = 0; union gve_adminq_command cmd; @@ -733,13 +795,14 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, - &dev_op_jumbo_frames); + &dev_op_jumbo_frames, + &dev_op_dqo_qpl); if (err) goto free_device_descriptor; /* If the GQI_RAW_ADDRESSING option is not enabled and the queue format * is not set to GqiRda, choose the queue format in a priority order: - * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default. + * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default. */ if (dev_op_dqo_rda) { priv->queue_format = GVE_DQO_RDA_FORMAT; @@ -747,7 +810,11 @@ int gve_adminq_describe_device(struct gve_priv *priv) "Driver is running with DQO RDA queue format.\n"); supported_features_mask = be32_to_cpu(dev_op_dqo_rda->supported_features_mask); - } else if (dev_op_gqi_rda) { + } else if (dev_op_dqo_qpl) { + priv->queue_format = GVE_DQO_QPL_FORMAT; + supported_features_mask = + be32_to_cpu(dev_op_dqo_qpl->supported_features_mask); + } else if (dev_op_gqi_rda) { priv->queue_format = GVE_GQI_RDA_FORMAT; dev_info(&priv->pdev->dev, "Driver is running with GQI RDA queue format.\n"); @@ -798,7 +865,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames); + dev_op_jumbo_frames, dev_op_dqo_qpl); free_device_descriptor: dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor, diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index f894beb3deaf..38a22279e863 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda { static_assert(sizeof(struct gve_device_option_dqo_rda) == 8); +struct gve_device_option_dqo_qpl { + __be32 supported_features_mask; + __be16 tx_pages_per_qpl; + __be16 rx_pages_per_qpl; +}; + +static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8); + struct gve_device_option_jumbo_frames { __be32 supported_features_mask; __be16 max_mtu; @@ -130,6 +138,7 @@ enum gve_dev_opt_id { GVE_DEV_OPT_ID_GQI_RDA = 0x2, GVE_DEV_OPT_ID_GQI_QPL = 0x3, GVE_DEV_OPT_ID_DQO_RDA = 0x4, + GVE_DEV_OPT_ID_DQO_QPL = 0x7, GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, }; @@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask { GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, }; enum gve_sup_feature_mask { diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index e6f1711d9be0..5704b5f57cd0 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -31,7 +31,6 @@ // Minimum amount of time between queue kicks in msec (10 seconds) #define MIN_TX_TIMEOUT_GAP (1000 * 10) -#define DQO_TX_MAX 0x3FFFF char gve_driver_name[] = "gve"; const char gve_version_str[] = GVE_VERSION; @@ -494,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv) goto abort_with_stats_report; } - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (!gve_is_gqi(priv)) { priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), GFP_KERNEL); if (!priv->ptype_lut_dqo) { @@ -1083,11 +1082,12 @@ free_qpls: static int gve_alloc_qpls(struct gve_priv *priv) { int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; + int page_count; int start_id; int i, j; int err; - if (priv->queue_format != GVE_GQI_QPL_FORMAT) + if (!gve_is_qpl(priv)) return 0; priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); @@ -1095,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv) return -ENOMEM; start_id = gve_tx_start_qpl_id(priv); + page_count = priv->tx_pages_per_qpl; for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { err = gve_alloc_queue_page_list(priv, i, - priv->tx_pages_per_qpl); + page_count); if (err) goto free_qpls; } start_id = gve_rx_start_qpl_id(priv); + + /* For GQI_QPL number of pages allocated have 1:1 relationship with + * number of descriptors. For DQO, number of pages required are + * more than descriptors (because of out of order completions). + */ + page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? + priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { err = gve_alloc_queue_page_list(priv, i, - priv->rx_data_slot_cnt); + page_count); if (err) goto free_qpls; } @@ -2051,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) /* Big TCP is only supported on DQ*/ if (!gve_is_gqi(priv)) - netif_set_tso_max_size(priv->dev, DQO_TX_MAX); + netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); priv->num_registered_pages = 0; priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; From a6fb8d5a8b6925f1e635818d3dd2d89531d4a058 Mon Sep 17 00:00:00 2001 From: Rushil Gupta Date: Fri, 4 Aug 2023 21:34:42 +0000 Subject: [PATCH 2/4] gve: Tx path for DQO-QPL Each QPL page is divided into GVE_TX_BUFS_PER_PAGE_DQO buffers. When a packet needs to be transmitted, we break the packet into max GVE_TX_BUF_SIZE_DQO sized chunks and transmit each chunk using a TX descriptor. We allocate the TX buffers from the free list in dqo_tx. We store these TX buffer indices in an array in the pending_packet structure. The TX buffers are returned to the free list in dqo_compl after receiving packet completion or when removing packets from miss completions list. Signed-off-by: Rushil Gupta Reviewed-by: Willem de Bruijn Signed-off-by: Praveen Kaligineedi Signed-off-by: Bailey Forrest Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 77 +++- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 410 +++++++++++++++---- 2 files changed, 401 insertions(+), 86 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 3bd6eed7280f..2042f79cea1f 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -58,6 +58,20 @@ /* Maximum TSO size supported on DQO */ #define GVE_DQO_TX_MAX 0x3FFFF +#define GVE_TX_BUF_SHIFT_DQO 11 + +/* 2K buffers for DQO-QPL */ +#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO) +#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO) +#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO)) + +/* If number of free/recyclable buffers are less than this threshold; driver + * allocs and uses a non-qpl page on the receive path of DQO QPL to free + * up buffers. + * Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC. + */ +#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ @@ -338,8 +352,14 @@ struct gve_tx_pending_packet_dqo { * All others correspond to `skb`'s frags and should be unmapped with * `dma_unmap_page`. */ - DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]); - DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]); + union { + struct { + DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]); + DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]); + }; + s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT]; + }; + u16 num_bufs; /* Linked list index to next element in the list, or -1 if none */ @@ -394,6 +414,32 @@ struct gve_tx_ring { * set. */ u32 last_re_idx; + + /* free running number of packet buf descriptors posted */ + u16 posted_packet_desc_cnt; + /* free running number of packet buf descriptors completed */ + u16 completed_packet_desc_cnt; + + /* QPL fields */ + struct { + /* Linked list of gve_tx_buf_dqo. Index into + * tx_qpl_buf_next, or -1 if empty. + * + * This is a consumer list owned by the TX path. When it + * runs out, the producer list is stolen from the + * completion handling path + * (dqo_compl.free_tx_qpl_buf_head). + */ + s16 free_tx_qpl_buf_head; + + /* Free running count of the number of QPL tx buffers + * allocated + */ + u32 alloc_tx_qpl_buf_cnt; + + /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */ + u32 free_tx_qpl_buf_cnt; + }; } dqo_tx; }; @@ -437,6 +483,24 @@ struct gve_tx_ring { * reached a specified timeout. */ struct gve_index_list timed_out_completions; + + /* QPL fields */ + struct { + /* Linked list of gve_tx_buf_dqo. Index into + * tx_qpl_buf_next, or -1 if empty. + * + * This is the producer list, owned by the completion + * handling path. When the consumer list + * (dqo_tx.free_tx_qpl_buf_head) is runs out, this list + * will be stolen. + */ + atomic_t free_tx_qpl_buf_head; + + /* Free running count of the number of tx buffers + * freed + */ + atomic_t free_tx_qpl_buf_cnt; + }; } dqo_compl; } ____cacheline_aligned; u64 pkt_done; /* free-running - total packets completed */ @@ -468,6 +532,15 @@ struct gve_tx_ring { struct { /* qpl assigned to this queue */ struct gve_queue_page_list *qpl; + + /* Each QPL page is divided into TX bounce buffers + * of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is + * an array to manage linked lists of TX buffers. + * An entry j at index i implies that j'th buffer + * is next on the list after i + */ + s16 *tx_qpl_buf_next; + u32 num_tx_qpl_bufs; }; } dqo; } ____cacheline_aligned; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 3c09e66ba1ab..1e19b834a613 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -13,6 +13,89 @@ #include #include +/* Returns true if tx_bufs are available. */ +static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) +{ + int num_avail; + + if (!tx->dqo.qpl) + return true; + + num_avail = tx->dqo.num_tx_qpl_bufs - + (tx->dqo_tx.alloc_tx_qpl_buf_cnt - + tx->dqo_tx.free_tx_qpl_buf_cnt); + + if (count <= num_avail) + return true; + + /* Update cached value from dqo_compl. */ + tx->dqo_tx.free_tx_qpl_buf_cnt = + atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_cnt); + + num_avail = tx->dqo.num_tx_qpl_bufs - + (tx->dqo_tx.alloc_tx_qpl_buf_cnt - + tx->dqo_tx.free_tx_qpl_buf_cnt); + + return count <= num_avail; +} + +static s16 +gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx) +{ + s16 index; + + index = tx->dqo_tx.free_tx_qpl_buf_head; + + /* No TX buffers available, try to steal the list from the + * completion handler. + */ + if (unlikely(index == -1)) { + tx->dqo_tx.free_tx_qpl_buf_head = + atomic_xchg(&tx->dqo_compl.free_tx_qpl_buf_head, -1); + index = tx->dqo_tx.free_tx_qpl_buf_head; + + if (unlikely(index == -1)) + return index; + } + + /* Remove TX buf from free list */ + tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index]; + + return index; +} + +static void +gve_free_tx_qpl_bufs(struct gve_tx_ring *tx, + struct gve_tx_pending_packet_dqo *pkt) +{ + s16 index; + int i; + + if (!pkt->num_bufs) + return; + + index = pkt->tx_qpl_buf_ids[0]; + /* Create a linked list of buffers to be added to the free list */ + for (i = 1; i < pkt->num_bufs; i++) { + tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i]; + index = pkt->tx_qpl_buf_ids[i]; + } + + while (true) { + s16 old_head = atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_head); + + tx->dqo.tx_qpl_buf_next[index] = old_head; + if (atomic_cmpxchg(&tx->dqo_compl.free_tx_qpl_buf_head, + old_head, + pkt->tx_qpl_buf_ids[0]) == old_head) { + break; + } + } + + atomic_add(pkt->num_bufs, &tx->dqo_compl.free_tx_qpl_buf_cnt); + pkt->num_bufs = 0; +} + /* Returns true if a gve_tx_pending_packet_dqo object is available. */ static bool gve_has_pending_packet(struct gve_tx_ring *tx) { @@ -136,9 +219,40 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx) kvfree(tx->dqo.pending_packets); tx->dqo.pending_packets = NULL; + kvfree(tx->dqo.tx_qpl_buf_next); + tx->dqo.tx_qpl_buf_next = NULL; + + if (tx->dqo.qpl) { + gve_unassign_qpl(priv, tx->dqo.qpl->id); + tx->dqo.qpl = NULL; + } + netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); } +static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx) +{ + int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO * + tx->dqo.qpl->num_entries; + int i; + + tx->dqo.tx_qpl_buf_next = kvcalloc(num_tx_qpl_bufs, + sizeof(tx->dqo.tx_qpl_buf_next[0]), + GFP_KERNEL); + if (!tx->dqo.tx_qpl_buf_next) + return -ENOMEM; + + tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs; + + /* Generate free TX buf list */ + for (i = 0; i < num_tx_qpl_bufs - 1; i++) + tx->dqo.tx_qpl_buf_next[i] = i + 1; + tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - 1] = -1; + + atomic_set_release(&tx->dqo_compl.free_tx_qpl_buf_head, -1); + return 0; +} + static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) { struct gve_tx_ring *tx = &priv->tx[idx]; @@ -155,7 +269,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) /* Queue sizes must be a power of 2 */ tx->mask = priv->tx_desc_cnt - 1; - tx->dqo.complq_mask = priv->options_dqo_rda.tx_comp_ring_entries - 1; + tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ? + priv->options_dqo_rda.tx_comp_ring_entries - 1 : + tx->mask; /* The max number of pending packets determines the maximum number of * descriptors which maybe written to the completion queue. @@ -211,6 +327,15 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!tx->q_resources) goto err; + if (gve_is_qpl(priv)) { + tx->dqo.qpl = gve_assign_tx_qpl(priv, idx); + if (!tx->dqo.qpl) + goto err; + + if (gve_tx_qpl_buf_init(tx)) + goto err; + } + gve_tx_add_to_block(priv, idx); return 0; @@ -267,20 +392,27 @@ static u32 num_avail_tx_slots(const struct gve_tx_ring *tx) return tx->mask - num_used; } +static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx, + int desc_count, int buf_count) +{ + return gve_has_pending_packet(tx) && + num_avail_tx_slots(tx) >= desc_count && + gve_has_free_tx_qpl_bufs(tx, buf_count); +} + /* Stops the queue if available descriptors is less than 'count'. * Return: 0 if stop is not required. */ -static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count) +static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, + int desc_count, int buf_count) { - if (likely(gve_has_pending_packet(tx) && - num_avail_tx_slots(tx) >= count)) + if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return 0; /* Update cached TX head pointer */ tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); - if (likely(gve_has_pending_packet(tx) && - num_avail_tx_slots(tx) >= count)) + if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return 0; /* No space, so stop the queue */ @@ -295,8 +427,7 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count) */ tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); - if (likely(!gve_has_pending_packet(tx) || - num_avail_tx_slots(tx) < count)) + if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return -EBUSY; netif_tx_start_queue(tx->netdev_txq); @@ -444,44 +575,16 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, }; } -/* Returns 0 on success, or < 0 on error. - * - * Before this function is called, the caller must ensure - * gve_has_pending_packet(tx) returns true. - */ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, - struct sk_buff *skb) + struct sk_buff *skb, + struct gve_tx_pending_packet_dqo *pkt, + s16 completion_tag, + u32 *desc_idx, + bool is_gso) { const struct skb_shared_info *shinfo = skb_shinfo(skb); - const bool is_gso = skb_is_gso(skb); - u32 desc_idx = tx->dqo_tx.tail; - - struct gve_tx_pending_packet_dqo *pkt; - struct gve_tx_metadata_dqo metadata; - s16 completion_tag; int i; - pkt = gve_alloc_pending_packet(tx); - pkt->skb = skb; - pkt->num_bufs = 0; - completion_tag = pkt - tx->dqo.pending_packets; - - gve_extract_tx_metadata_dqo(skb, &metadata); - if (is_gso) { - int header_len = gve_prep_tso(skb); - - if (unlikely(header_len < 0)) - goto err; - - gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx, - skb, &metadata, header_len); - desc_idx = (desc_idx + 1) & tx->mask; - } - - gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx, - &metadata); - desc_idx = (desc_idx + 1) & tx->mask; - /* Note: HW requires that the size of a non-TSO packet be within the * range of [17, 9728]. * @@ -490,6 +593,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, * - Hypervisor won't allow MTU larger than 9216. */ + pkt->num_bufs = 0; /* Map the linear portion of skb */ { u32 len = skb_headlen(skb); @@ -503,7 +607,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); ++pkt->num_bufs; - gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, completion_tag, /*eop=*/shinfo->nr_frags == 0, is_gso); } @@ -522,10 +626,139 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); ++pkt->num_bufs; - gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, completion_tag, is_eop, is_gso); } + return 0; +err: + for (i = 0; i < pkt->num_bufs; i++) { + if (i == 0) { + dma_unmap_single(tx->dev, + dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE); + } else { + dma_unmap_page(tx->dev, + dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE); + } + } + pkt->num_bufs = 0; + return -1; +} + +/* Tx buffer i corresponds to + * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO + * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO + */ +static void gve_tx_buf_get_addr(struct gve_tx_ring *tx, + s16 index, + void **va, dma_addr_t *dma_addr) +{ + int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); + int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << GVE_TX_BUF_SHIFT_DQO; + + *va = page_address(tx->dqo.qpl->pages[page_id]) + offset; + *dma_addr = tx->dqo.qpl->page_buses[page_id] + offset; +} + +static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, + struct sk_buff *skb, + struct gve_tx_pending_packet_dqo *pkt, + s16 completion_tag, + u32 *desc_idx, + bool is_gso) +{ + u32 copy_offset = 0; + dma_addr_t dma_addr; + u32 copy_len; + s16 index; + void *va; + + /* Break the packet into buffer size chunks */ + pkt->num_bufs = 0; + while (copy_offset < skb->len) { + index = gve_alloc_tx_qpl_buf(tx); + if (unlikely(index == -1)) + goto err; + + gve_tx_buf_get_addr(tx, index, &va, &dma_addr); + copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO, + skb->len - copy_offset); + skb_copy_bits(skb, copy_offset, va, copy_len); + + copy_offset += copy_len; + dma_sync_single_for_device(tx->dev, dma_addr, + copy_len, DMA_TO_DEVICE); + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, + copy_len, + dma_addr, + completion_tag, + copy_offset == skb->len, + is_gso); + + pkt->tx_qpl_buf_ids[pkt->num_bufs] = index; + ++tx->dqo_tx.alloc_tx_qpl_buf_cnt; + ++pkt->num_bufs; + } + + return 0; +err: + /* Should not be here if gve_has_free_tx_qpl_bufs() check is correct */ + gve_free_tx_qpl_bufs(tx, pkt); + return -ENOMEM; +} + +/* Returns 0 on success, or < 0 on error. + * + * Before this function is called, the caller must ensure + * gve_has_pending_packet(tx) returns true. + */ +static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, + struct sk_buff *skb) +{ + const bool is_gso = skb_is_gso(skb); + u32 desc_idx = tx->dqo_tx.tail; + struct gve_tx_pending_packet_dqo *pkt; + struct gve_tx_metadata_dqo metadata; + s16 completion_tag; + + pkt = gve_alloc_pending_packet(tx); + pkt->skb = skb; + completion_tag = pkt - tx->dqo.pending_packets; + + gve_extract_tx_metadata_dqo(skb, &metadata); + if (is_gso) { + int header_len = gve_prep_tso(skb); + + if (unlikely(header_len < 0)) + goto err; + + gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx, + skb, &metadata, header_len); + desc_idx = (desc_idx + 1) & tx->mask; + } + + gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx, + &metadata); + desc_idx = (desc_idx + 1) & tx->mask; + + if (tx->dqo.qpl) { + if (gve_tx_add_skb_copy_dqo(tx, skb, pkt, + completion_tag, + &desc_idx, is_gso)) + goto err; + } else { + if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt, + completion_tag, + &desc_idx, is_gso)) + goto err; + } + + tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; + /* Commit the changes to our state */ tx->dqo_tx.tail = desc_idx; @@ -547,22 +780,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, return 0; err: - for (i = 0; i < pkt->num_bufs; i++) { - if (i == 0) { - dma_unmap_single(tx->dev, - dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), - DMA_TO_DEVICE); - } else { - dma_unmap_page(tx->dev, - dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), - DMA_TO_DEVICE); - } - } - pkt->skb = NULL; - pkt->num_bufs = 0; gve_free_pending_packet(tx, pkt); return -1; @@ -636,40 +854,56 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, int num_buffer_descs; int total_num_descs; - if (skb_is_gso(skb)) { - /* If TSO doesn't meet HW requirements, attempt to linearize the - * packet. - */ - if (unlikely(!gve_can_send_tso(skb) && - skb_linearize(skb) < 0)) { - net_err_ratelimited("%s: Failed to transmit TSO packet\n", - priv->dev->name); - goto drop; - } - - if (unlikely(ipv6_hopopt_jumbo_remove(skb))) - goto drop; - - num_buffer_descs = gve_num_buffer_descs_needed(skb); - } else { - num_buffer_descs = gve_num_buffer_descs_needed(skb); - - if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) { - if (unlikely(skb_linearize(skb) < 0)) + if (tx->dqo.qpl) { + if (skb_is_gso(skb)) + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) goto drop; - num_buffer_descs = 1; + /* We do not need to verify the number of buffers used per + * packet or per segment in case of TSO as with 2K size buffers + * none of the TX packet rules would be violated. + * + * gve_can_send_tso() checks that each TCP segment of gso_size is + * not distributed over more than 9 SKB frags.. + */ + num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO); + } else { + if (skb_is_gso(skb)) { + /* If TSO doesn't meet HW requirements, attempt to linearize the + * packet. + */ + if (unlikely(!gve_can_send_tso(skb) && + skb_linearize(skb) < 0)) { + net_err_ratelimited("%s: Failed to transmit TSO packet\n", + priv->dev->name); + goto drop; + } + + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto drop; + + num_buffer_descs = gve_num_buffer_descs_needed(skb); + } else { + num_buffer_descs = gve_num_buffer_descs_needed(skb); + + if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) { + if (unlikely(skb_linearize(skb) < 0)) + goto drop; + + num_buffer_descs = 1; + } } } /* Metadata + (optional TSO) + data descriptors. */ total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs; if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs + - GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP))) { + GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP, + num_buffer_descs))) { return -1; } - if (unlikely(gve_tx_add_skb_no_copy_dqo(tx, skb) < 0)) + if (unlikely(gve_tx_add_skb_dqo(tx, skb) < 0)) goto drop; netdev_tx_sent_queue(tx->netdev_txq, skb->len); @@ -817,7 +1051,11 @@ static void gve_handle_packet_completion(struct gve_priv *priv, return; } } - gve_unmap_packet(tx->dev, pending_packet); + tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs; + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, pending_packet); + else + gve_unmap_packet(tx->dev, pending_packet); *bytes += pending_packet->skb->len; (*pkts)++; @@ -875,12 +1113,16 @@ static void remove_miss_completions(struct gve_priv *priv, remove_from_list(tx, &tx->dqo_compl.miss_completions, pending_packet); - /* Unmap buffers and free skb but do not unallocate packet i.e. + /* Unmap/free TX buffers and free skb but do not unallocate packet i.e. * the completion tag is not freed to ensure that the driver * can take appropriate action if a corresponding valid * completion is received later. */ - gve_unmap_packet(tx->dev, pending_packet); + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, pending_packet); + else + gve_unmap_packet(tx->dev, pending_packet); + /* This indicates the packet was dropped. */ dev_kfree_skb_any(pending_packet->skb); pending_packet->skb = NULL; From e7075ab4fb6b39730dfbfbfa3a5505d678f01d2c Mon Sep 17 00:00:00 2001 From: Rushil Gupta Date: Fri, 4 Aug 2023 21:34:43 +0000 Subject: [PATCH 3/4] gve: RX path for DQO-QPL The RX path allocates the QPL page pool at queue creation, and tries to reuse these pages through page recycling. This patch ensures that on refill no non-QPL pages are posted to the device. When the driver is running low on free buffers, an ondemand allocation step kicks in that allocates a non-qpl page for SKB business to free up the QPL page in use. gve_try_recycle_buf was moved to gve_rx_append_frags so that driver does not attempt to mark buffer as used if a non-qpl page was allocated ondemand. Signed-off-by: Rushil Gupta Reviewed-by: Willem de Bruijn Signed-off-by: Praveen Kaligineedi Signed-off-by: Bailey Forrest Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 6 + drivers/net/ethernet/google/gve/gve_rx_dqo.c | 126 ++++++++++++++++--- 2 files changed, 114 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 2042f79cea1f..0d1e681be250 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -241,6 +241,12 @@ struct gve_rx_ring { /* qpl assigned to this queue */ struct gve_queue_page_list *qpl; + + /* index into queue page list */ + u32 next_qpl_page_idx; + + /* track number of used buffers */ + u16 used_buf_states_cnt; } dqo; }; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index e57b73eb70f6..ea0e38b4d9e9 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -22,11 +22,13 @@ static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) } static void gve_free_page_dqo(struct gve_priv *priv, - struct gve_rx_buf_state_dqo *bs) + struct gve_rx_buf_state_dqo *bs, + bool free_page) { page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); - gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, - DMA_FROM_DEVICE); + if (free_page) + gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, + DMA_FROM_DEVICE); bs->page_info.page = NULL; } @@ -130,12 +132,20 @@ gve_get_recycled_buf_state(struct gve_rx_ring *rx) */ for (i = 0; i < 5; i++) { buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) + if (gve_buf_ref_cnt(buf_state) == 0) { + rx->dqo.used_buf_states_cnt--; return buf_state; + } gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); } + /* For QPL, we cannot allocate any new buffers and must + * wait for the existing ones to be available. + */ + if (rx->dqo.qpl) + return NULL; + /* If there are no free buf states discard an entry from * `used_buf_states` so it can be used. */ @@ -144,23 +154,39 @@ gve_get_recycled_buf_state(struct gve_rx_ring *rx) if (gve_buf_ref_cnt(buf_state) == 0) return buf_state; - gve_free_page_dqo(rx->gve, buf_state); + gve_free_page_dqo(rx->gve, buf_state, true); gve_free_buf_state(rx, buf_state); } return NULL; } -static int gve_alloc_page_dqo(struct gve_priv *priv, +static int gve_alloc_page_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { - int err; + struct gve_priv *priv = rx->gve; + u32 idx; - err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page, - &buf_state->addr, DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) - return err; + if (!rx->dqo.qpl) { + int err; + err = gve_alloc_page(priv, &priv->pdev->dev, + &buf_state->page_info.page, + &buf_state->addr, + DMA_FROM_DEVICE, GFP_ATOMIC); + if (err) + return err; + } else { + idx = rx->dqo.next_qpl_page_idx; + if (idx >= priv->rx_pages_per_qpl) { + net_err_ratelimited("%s: Out of QPL pages\n", + priv->dev->name); + return -ENOMEM; + } + buf_state->page_info.page = rx->dqo.qpl->pages[idx]; + buf_state->addr = rx->dqo.qpl->page_buses[idx]; + rx->dqo.next_qpl_page_idx++; + } buf_state->page_info.page_offset = 0; buf_state->page_info.page_address = page_address(buf_state->page_info.page); @@ -195,9 +221,13 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - + /* Only free page for RDA. QPL pages are freed in gve_main. */ if (bs->page_info.page) - gve_free_page_dqo(priv, bs); + gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + } + if (rx->dqo.qpl) { + gve_unassign_qpl(priv, rx->dqo.qpl->id); + rx->dqo.qpl = NULL; } if (rx->dqo.bufq.desc_ring) { @@ -229,7 +259,8 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) int i; const u32 buffer_queue_slots = - priv->options_dqo_rda.rx_buff_ring_entries; + priv->queue_format == GVE_DQO_RDA_FORMAT ? + priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt; const u32 completion_queue_slots = priv->rx_desc_cnt; netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); @@ -243,7 +274,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) rx->ctx.skb_head = NULL; rx->ctx.skb_tail = NULL; - rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4); + rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ? + min_t(s16, S16_MAX, buffer_queue_slots * 4) : + priv->rx_pages_per_qpl; rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), GFP_KERNEL); @@ -275,6 +308,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!rx->dqo.bufq.desc_ring) goto err; + if (priv->queue_format != GVE_DQO_RDA_FORMAT) { + rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num); + if (!rx->dqo.qpl) + goto err; + rx->dqo.next_qpl_page_idx = 0; + } + rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources), &rx->q_resources_bus, GFP_KERNEL); if (!rx->q_resources) @@ -352,7 +392,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) if (unlikely(!buf_state)) break; - if (unlikely(gve_alloc_page_dqo(priv, buf_state))) { + if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { u64_stats_update_begin(&rx->statss); rx->rx_buf_alloc_fail++; u64_stats_update_end(&rx->statss); @@ -415,6 +455,7 @@ static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, mark_used: gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + rx->dqo.used_buf_states_cnt++; } static void gve_rx_skb_csum(struct sk_buff *skb, @@ -475,6 +516,43 @@ static void gve_rx_free_skb(struct gve_rx_ring *rx) rx->ctx.skb_tail = NULL; } +static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) +{ + if (!rx->dqo.qpl) + return false; + if (rx->dqo.used_buf_states_cnt < + (rx->dqo.num_buf_states - + GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) + return false; + return true; +} + +static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + u16 buf_len) +{ + struct page *page = alloc_page(GFP_ATOMIC); + int num_frags; + + if (!page) + return -ENOMEM; + + memcpy(page_address(page), + buf_state->page_info.page_address + + buf_state->page_info.page_offset, + buf_len); + num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page, + 0, buf_len, PAGE_SIZE); + + u64_stats_update_begin(&rx->statss); + rx->rx_frag_alloc_cnt++; + u64_stats_update_end(&rx->statss); + /* Return unused buffer. */ + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + return 0; +} + /* Chains multi skbs for single rx packet. * Returns 0 if buffer is appended, -1 otherwise. */ @@ -502,12 +580,20 @@ static int gve_rx_append_frags(struct napi_struct *napi, rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; } + /* Trigger ondemand page allocation if we are running low on buffers */ + if (gve_rx_should_trigger_copy_ondemand(rx)) + return gve_rx_copy_ondemand(rx, buf_state, buf_len); + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, buf_state->page_info.page, buf_state->page_info.page_offset, buf_len, priv->data_buffer_size_dqo); gve_dec_pagecnt_bias(&buf_state->page_info); + /* Advances buffer page-offset if page is partially used. + * Marks buffer as used if page is full. + */ + gve_try_recycle_buf(priv, rx, buf_state); return 0; } @@ -561,8 +647,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, priv)) != 0) { goto error; } - - gve_try_recycle_buf(priv, rx, buf_state); return 0; } @@ -588,6 +672,12 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, goto error; rx->ctx.skb_tail = rx->ctx.skb_head; + if (gve_rx_should_trigger_copy_ondemand(rx)) { + if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) + goto error; + return 0; + } + skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, buf_state->page_info.page_offset, buf_len, priv->data_buffer_size_dqo); From 5a3f8d1231073fc5f0b6f38ab8337d424ba0cfe4 Mon Sep 17 00:00:00 2001 From: Rushil Gupta Date: Fri, 4 Aug 2023 21:34:44 +0000 Subject: [PATCH 4/4] gve: update gve.rst Add a note about QPL and RDA mode Signed-off-by: Rushil Gupta Reviewed-by: Willem de Bruijn Signed-off-by: Praveen Kaligineedi Signed-off-by: Bailey Forrest Signed-off-by: David S. Miller --- .../networking/device_drivers/ethernet/google/gve.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/google/gve.rst b/Documentation/networking/device_drivers/ethernet/google/gve.rst index 6d73ee78f3d7..31d621bca82e 100644 --- a/Documentation/networking/device_drivers/ethernet/google/gve.rst +++ b/Documentation/networking/device_drivers/ethernet/google/gve.rst @@ -52,6 +52,15 @@ Descriptor Formats GVE supports two descriptor formats: GQI and DQO. These two formats have entirely different descriptors, which will be described below. +Addressing Mode +------------------ +GVE supports two addressing modes: QPL and RDA. +QPL ("queue-page-list") mode communicates data through a set of +pre-registered pages. + +For RDA ("raw DMA addressing") mode, the set of pages is dynamic. +Therefore, the packet buffers can be anywhere in guest memory. + Registers --------- All registers are MMIO.