net/mlx5: Allocating a pool of MSI-X vectors for SFs

SFs (Sub Functions) currently use IRQs from the global IRQ table their
parent Physical Function have. In order to better scale, we need to
allocate more IRQs and share them between different SFs.

Driver will maintain 3 separated irq pools:
1. A pool that serve the PF consumer (PF's netdev, rdma stacks), similar
to what the driver had before this patch. i.e, this pool will share irqs
between rdma and netev, and will keep the irq indexes and allocation
order. The last is important for PF netdev rmap (aRFS).

2. A pool of control IRQs for SFs. The size of this pool is the number
of SFs that can be created divided by SFS_PER_IRQ. This pool will serve
the control path EQs of the SFs.

3. A pool of completion data path IRQs for SFs transport queues. The
size of this pool is:
num_irqs_allocated - pf_pool_size - sf_ctrl_pool_size.
This pool will served netdev and rdma stacks. Moreover, rmap is not
supported on SFs.

Sharing methodology of the SFs pools is explained in the next patch.

Important note: rmap is not supported on SFs because rmap mapping cannot
function correctly for IRQs that are shared for different core/netdev RX
rings.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
Shay Drory 2021-02-23 11:48:17 +02:00 committed by Saeed Mahameed
parent fc63dd2a85
commit 71e084e264
3 changed files with 240 additions and 132 deletions

View File

@ -471,14 +471,7 @@ static int create_async_eq(struct mlx5_core_dev *dev,
int err; int err;
mutex_lock(&eq_table->lock); mutex_lock(&eq_table->lock);
/* Async EQs must share irq index 0 */
if (param->irq_index != 0) {
err = -EINVAL;
goto unlock;
}
err = create_map_eq(dev, eq, param); err = create_map_eq(dev, eq, param);
unlock:
mutex_unlock(&eq_table->lock); mutex_unlock(&eq_table->lock);
return err; return err;
} }
@ -996,8 +989,11 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
eq_table->num_comp_eqs = eq_table->num_comp_eqs =
min_t(int, min_t(int,
mlx5_irq_get_num_comp(eq_table->irq_table), mlx5_irq_table_get_num_comp(eq_table->irq_table),
num_eqs - MLX5_MAX_ASYNC_EQS); num_eqs - MLX5_MAX_ASYNC_EQS);
if (mlx5_core_is_sf(dev))
eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
MLX5_COMP_EQS_PER_SF);
err = create_async_eqs(dev); err = create_async_eqs(dev);
if (err) { if (err) {

View File

@ -6,13 +6,17 @@
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#define MLX5_COMP_EQS_PER_SF 8
#define MLX5_IRQ_EQ_CTRL (0)
struct mlx5_irq; struct mlx5_irq;
int mlx5_irq_table_init(struct mlx5_core_dev *dev); int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,

View File

@ -7,11 +7,19 @@
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "mlx5_irq.h" #include "mlx5_irq.h"
#include "sf/sf.h"
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h> #include <linux/cpu_rmap.h>
#endif #endif
#define MLX5_MAX_IRQ_NAME (32) #define MLX5_MAX_IRQ_NAME (32)
/* max irq_index is 255. three chars */
#define MLX5_MAX_IRQ_IDX_CHARS (3)
#define MLX5_SFS_PER_CTRL_IRQ 64
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectores for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
struct mlx5_irq { struct mlx5_irq {
u32 index; u32 index;
@ -20,42 +28,22 @@ struct mlx5_irq {
char name[MLX5_MAX_IRQ_NAME]; char name[MLX5_MAX_IRQ_NAME];
struct kref kref; struct kref kref;
int irqn; int irqn;
struct mlx5_irq_table *table; struct mlx5_irq_pool *pool;
};
struct mlx5_irq_pool {
char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
struct xa_limit xa_num_irqs;
struct xarray irqs;
struct mlx5_core_dev *dev;
}; };
struct mlx5_irq_table { struct mlx5_irq_table {
struct xarray irqs; struct mlx5_irq_pool *pf_pool;
int nvec; struct mlx5_irq_pool *sf_ctrl_pool;
struct mlx5_irq_pool *sf_comp_pool;
}; };
int mlx5_irq_table_init(struct mlx5_core_dev *dev)
{
struct mlx5_irq_table *irq_table;
if (mlx5_core_is_sf(dev))
return 0;
irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
if (!irq_table)
return -ENOMEM;
dev->priv.irq_table = irq_table;
return 0;
}
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
{
if (mlx5_core_is_sf(dev))
return;
kvfree(dev->priv.irq_table);
}
int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
{
return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
}
/** /**
* mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
* to be ssigned to each VF. * to be ssigned to each VF.
@ -144,9 +132,9 @@ out:
static void irq_release(struct kref *kref) static void irq_release(struct kref *kref)
{ {
struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref); struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
struct mlx5_irq_table *table = irq->table; struct mlx5_irq_pool *pool = irq->pool;
xa_erase(&table->irqs, irq->index); xa_erase(&pool->irqs, irq->index);
/* free_irq requires that affinity and rmap will be cleared /* free_irq requires that affinity and rmap will be cleared
* before calling it. This is why there is asymmetry with set_rmap * before calling it. This is why there is asymmetry with set_rmap
* which should be called after alloc_irq but before request_irq. * which should be called after alloc_irq but before request_irq.
@ -162,6 +150,76 @@ static void irq_put(struct mlx5_irq *irq)
kref_put(&irq->kref, irq_release); kref_put(&irq->kref, irq_release);
} }
static irqreturn_t irq_int_handler(int irq, void *nh)
{
atomic_notifier_call_chain(nh, 0, NULL);
return IRQ_HANDLED;
}
static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
{
snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
}
static void irq_set_name(char *name, int vecidx)
{
if (vecidx == 0) {
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
return;
}
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
vecidx - MLX5_IRQ_VEC_COMP_BASE);
}
static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
struct mlx5_irq *irq;
int err;
irq = kzalloc(sizeof(*irq), GFP_KERNEL);
if (!irq)
return ERR_PTR(-ENOMEM);
irq->irqn = pci_irq_vector(dev->pdev, i);
if (!pool->name[0])
irq_set_name(name, i);
else
irq_sf_set_name(pool, name, i);
ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
snprintf(irq->name, MLX5_MAX_IRQ_NAME,
"%s@pci:%s", name, pci_name(dev->pdev));
err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
&irq->nh);
if (err) {
mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
goto err_req_irq;
}
if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
err = -ENOMEM;
goto err_cpumask;
}
err = xa_alloc(&pool->irqs, &irq->index, irq, pool->xa_num_irqs,
GFP_KERNEL);
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
irq->index, err);
goto err_xa;
}
irq->pool = pool;
kref_init(&irq->kref);
return irq;
err_xa:
free_cpumask_var(irq->mask);
err_cpumask:
free_irq(irq->irqn, &irq->nh);
err_req_irq:
kfree(irq);
return ERR_PTR(err);
}
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
{ {
int err; int err;
@ -184,69 +242,9 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
return atomic_notifier_chain_unregister(&irq->nh, nb); return atomic_notifier_chain_unregister(&irq->nh, nb);
} }
static irqreturn_t irq_int_handler(int irq, void *nh) struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
{ {
atomic_notifier_call_chain(nh, 0, NULL); return irq->mask;
return IRQ_HANDLED;
}
static void irq_set_name(char *name, int vecidx)
{
if (!vecidx) {
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
return;
}
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
vecidx - MLX5_IRQ_VEC_COMP_BASE);
}
static struct mlx5_irq *irq_request(struct mlx5_core_dev *dev, int i)
{
struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
char name[MLX5_MAX_IRQ_NAME];
struct xa_limit xa_num_irqs;
struct mlx5_irq *irq;
int err;
irq = kzalloc(sizeof(*irq), GFP_KERNEL);
if (!irq)
return ERR_PTR(-ENOMEM);
irq->irqn = pci_irq_vector(dev->pdev, i);
irq_set_name(name, i);
ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
snprintf(irq->name, MLX5_MAX_IRQ_NAME,
"%s@pci:%s", name, pci_name(dev->pdev));
err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
&irq->nh);
if (err) {
mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
goto err_req_irq;
}
if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
err = -ENOMEM;
goto err_cpumask;
}
xa_num_irqs.min = 0;
xa_num_irqs.max = table->nvec;
err = xa_alloc(&table->irqs, &irq->index, irq, xa_num_irqs,
GFP_KERNEL);
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
irq->index, err);
goto err_xa;
}
irq->table = table;
kref_init(&irq->kref);
return irq;
err_xa:
free_cpumask_var(irq->mask);
err_cpumask:
free_irq(irq->irqn, &irq->nh);
err_req_irq:
kfree(irq);
return ERR_PTR(err);
} }
/** /**
@ -272,14 +270,17 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
struct cpumask *affinity) struct cpumask *affinity)
{ {
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq; struct mlx5_irq *irq;
irq = xa_load(&irq_table->irqs, vecidx); pool = irq_table->pf_pool;
irq = xa_load(&pool->irqs, vecidx);
if (irq) { if (irq) {
kref_get(&irq->kref); kref_get(&irq->kref);
return irq; return irq;
} }
irq = irq_request(dev, vecidx); irq = irq_request(pool, vecidx);
if (IS_ERR(irq)) if (IS_ERR(irq))
return irq; return irq;
cpumask_copy(irq->mask, affinity); cpumask_copy(irq->mask, affinity);
@ -287,53 +288,162 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
return irq; return irq;
} }
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) /* irq_pool API */
static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
{ {
return irq->mask; struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
if (!pool)
return ERR_PTR(-ENOMEM);
pool->dev = dev;
xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
pool->xa_num_irqs.min = start;
pool->xa_num_irqs.max = start + size - 1;
if (name)
snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
name);
mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
name, size, start);
return pool;
}
static void irq_pool_free(struct mlx5_irq_pool *pool)
{
struct mlx5_irq *irq;
unsigned long index;
xa_for_each(&pool->irqs, index, irq)
irq_release(&irq->kref);
xa_destroy(&pool->irqs);
kvfree(pool);
}
static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
int num_sf_ctrl_by_msix;
int num_sf_ctrl_by_sfs;
int num_sf_ctrl;
int err;
/* init pf_pool */
table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL);
if (IS_ERR(table->pf_pool))
return PTR_ERR(table->pf_pool);
if (!mlx5_sf_max_functions(dev))
return 0;
if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
mlx5_core_err(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
return 0;
}
/* init sf_ctrl_pool */
num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
MLX5_SFS_PER_CTRL_IRQ);
num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
"mlx5_sf_ctrl");
if (IS_ERR(table->sf_ctrl_pool)) {
err = PTR_ERR(table->sf_ctrl_pool);
goto err_pf;
}
/* init sf_comp_pool */
table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
sf_vec - num_sf_ctrl, "mlx5_sf_comp");
if (IS_ERR(table->sf_comp_pool)) {
err = PTR_ERR(table->sf_comp_pool);
goto err_sf_ctrl;
}
return 0;
err_sf_ctrl:
irq_pool_free(table->sf_ctrl_pool);
err_pf:
irq_pool_free(table->pf_pool);
return err;
}
static void irq_pools_destroy(struct mlx5_irq_table *table)
{
if (table->sf_ctrl_pool) {
irq_pool_free(table->sf_comp_pool);
irq_pool_free(table->sf_ctrl_pool);
}
irq_pool_free(table->pf_pool);
}
/* irq_table API */
int mlx5_irq_table_init(struct mlx5_core_dev *dev)
{
struct mlx5_irq_table *irq_table;
if (mlx5_core_is_sf(dev))
return 0;
irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
if (!irq_table)
return -ENOMEM;
dev->priv.irq_table = irq_table;
return 0;
}
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
{
if (mlx5_core_is_sf(dev))
return;
kvfree(dev->priv.irq_table);
}
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
{
return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
} }
int mlx5_irq_table_create(struct mlx5_core_dev *dev) int mlx5_irq_table_create(struct mlx5_core_dev *dev)
{ {
struct mlx5_priv *priv = &dev->priv;
struct mlx5_irq_table *table = priv->irq_table;
int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
MLX5_CAP_GEN(dev, max_num_eqs) : MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq); 1 << MLX5_CAP_GEN(dev, log_max_eq);
int nvec; int total_vec;
int pf_vec;
int err; int err;
if (mlx5_core_is_sf(dev)) if (mlx5_core_is_sf(dev))
return 0; return 0;
nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
MLX5_IRQ_VEC_COMP_BASE; MLX5_IRQ_VEC_COMP_BASE;
nvec = min_t(int, nvec, num_eqs); pf_vec = min_t(int, pf_vec, num_eqs);
if (nvec <= MLX5_IRQ_VEC_COMP_BASE) if (pf_vec <= MLX5_IRQ_VEC_COMP_BASE)
return -ENOMEM; return -ENOMEM;
xa_init_flags(&table->irqs, XA_FLAGS_ALLOC); total_vec = pf_vec;
if (mlx5_sf_max_functions(dev))
total_vec += MLX5_IRQ_CTRL_SF_MAX +
MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, total_vec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
nvec, PCI_IRQ_MSIX); total_vec, PCI_IRQ_MSIX);
if (nvec < 0) { if (total_vec < 0)
err = nvec; return total_vec;
goto err_free_irq; pf_vec = min(pf_vec, total_vec);
}
table->nvec = nvec; err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
if (err)
pci_free_irq_vectors(dev->pdev);
return 0;
err_free_irq:
xa_destroy(&table->irqs);
return err; return err;
} }
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
{ {
struct mlx5_irq_table *table = dev->priv.irq_table; struct mlx5_irq_table *table = dev->priv.irq_table;
struct mlx5_irq *irq;
unsigned long index;
if (mlx5_core_is_sf(dev)) if (mlx5_core_is_sf(dev))
return; return;
@ -341,10 +451,8 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
/* There are cases where IRQs still will be in used when we reaching /* There are cases where IRQs still will be in used when we reaching
* to here. Hence, making sure all the irqs are realeased. * to here. Hence, making sure all the irqs are realeased.
*/ */
xa_for_each(&table->irqs, index, irq) irq_pools_destroy(table);
irq_release(&irq->kref);
pci_free_irq_vectors(dev->pdev); pci_free_irq_vectors(dev->pdev);
xa_destroy(&table->irqs);
} }
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)