RDMA/mlx5: Add steering support in optional flow counters

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Link: https://lore.kernel.org/r/20211008122439.166063-12-markzhang@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Aharon Landau 2021-10-08 15:24:37 +03:00 committed by Jason Gunthorpe
parent 886773d249
commit ffa501ef19
3 changed files with 212 additions and 0 deletions

View File

@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return prio;
}
enum {
RDMA_RX_ECN_OPCOUNTER_PRIO,
RDMA_RX_CNP_OPCOUNTER_PRIO,
};
enum {
RDMA_TX_CNP_OPCOUNTER_PRIO,
};
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_flow_spec *spec)
{
if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
ft_field_support.source_vhca_port) ||
!MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
ft_field_support.source_vhca_port))
return -EOPNOTSUPP;
MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
misc_parameters.source_vhca_port);
MLX5_SET(fte_match_param, &spec->match_value,
misc_parameters.source_vhca_port, port_num);
return 0;
}
static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_flow_spec *spec, int ipv)
{
if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
ft_field_support.outer_ip_version))
return -EOPNOTSUPP;
if (mlx5_core_mp_enabled(dev->mdev) &&
set_vhca_port_spec(dev, port_num, spec))
return -EOPNOTSUPP;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.ip_ecn);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
INET_ECN_CE);
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
outer_headers.ip_version);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
ipv);
spec->match_criteria_enable =
get_match_criteria_enable(spec->match_criteria);
return 0;
}
static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_flow_spec *spec)
{
if (mlx5_core_mp_enabled(dev->mdev) &&
set_vhca_port_spec(dev, port_num, spec))
return -EOPNOTSUPP;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
misc_parameters.bth_opcode);
MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
IB_BTH_OPCODE_CNP);
spec->match_criteria_enable =
get_match_criteria_enable(spec->match_criteria);
return 0;
}
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
enum mlx5_flow_namespace_type fn_type;
int priority, i, err, spec_num;
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_destination dst;
struct mlx5_flow_namespace *ns;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_spec *spec;
spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
switch (type) {
case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
if (set_ecn_ce_spec(dev, port_num, &spec[0],
MLX5_FS_IPV4_VERSION) ||
set_ecn_ce_spec(dev, port_num, &spec[1],
MLX5_FS_IPV6_VERSION)) {
err = -EOPNOTSUPP;
goto free;
}
spec_num = 2;
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
break;
case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_receive_rdma.bth_opcode) ||
set_cnp_spec(dev, port_num, &spec[0])) {
err = -EOPNOTSUPP;
goto free;
}
spec_num = 1;
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
break;
case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
set_cnp_spec(dev, port_num, &spec[0])) {
err = -EOPNOTSUPP;
goto free;
}
spec_num = 1;
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
break;
default:
err = -EOPNOTSUPP;
goto free;
}
ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
if (!ns) {
err = -EOPNOTSUPP;
goto free;
}
prio = &dev->flow_db->opfcs[type];
if (!prio->flow_table) {
prio = _get_prio(ns, prio, priority,
dev->num_ports * MAX_OPFC_RULES, 1, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
goto free;
}
}
dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dst.counter_id = mlx5_fc_id(opfc->fc);
flow_act.action =
MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
for (i = 0; i < spec_num; i++) {
opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
&flow_act, &dst, 1);
if (IS_ERR(opfc->rule[i])) {
err = PTR_ERR(opfc->rule[i]);
goto del_rules;
}
}
prio->refcount += spec_num;
kfree(spec);
return 0;
del_rules:
for (i -= 1; i >= 0; i--)
mlx5_del_flow_rules(opfc->rule[i]);
put_flow_table(dev, prio, false);
free:
kfree(spec);
return err;
}
void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
int i;
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
mlx5_del_flow_rules(opfc->rule[i]);
put_flow_table(dev, &dev->flow_db->opfcs[type], true);
}
}
static void set_underlay_qp(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec,
u32 underlay_qpn)

View File

@ -263,6 +263,14 @@ struct mlx5_ib_pp {
struct mlx5_core_dev *mdev;
};
enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
MLX5_IB_OPCOUNTER_MAX,
};
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio fdb;
struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
@ -797,6 +806,13 @@ struct mlx5_ib_resources {
struct mlx5_ib_port_resources ports[2];
};
#define MAX_OPFC_RULES 2
struct mlx5_ib_op_fc {
struct mlx5_fc *fc;
struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
};
struct mlx5_ib_counters {
struct rdma_stat_desc *descs;
size_t *offsets;
@ -807,6 +823,14 @@ struct mlx5_ib_counters {
u16 set_id;
};
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type);
void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type);
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {

View File

@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT 23
#define IB_BTH_TVER_MASK 0xf
#define IB_BTH_TVER_SHIFT 16
#define IB_BTH_OPCODE_CNP 0x81
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{