Files
linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c

754 lines
19 KiB
C
Raw Normal View History

// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
#include <linux/netdevice.h>
#include <linux/string.h>
#include <linux/bitops.h>
#include <net/dcbnl.h>
#include "spectrum.h"
#include "reg.h"
static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev)
{
return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
}
static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev,
u8 mode)
{
return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0;
}
static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev,
struct ieee_ets *ets)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets));
return 0;
}
static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port,
struct ieee_ets *ets)
{
struct net_device *dev = mlxsw_sp_port->dev;
bool has_ets_tc = false;
int i, tx_bw_sum = 0;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
switch (ets->tc_tsa[i]) {
case IEEE_8021QAZ_TSA_STRICT:
break;
case IEEE_8021QAZ_TSA_ETS:
has_ets_tc = true;
tx_bw_sum += ets->tc_tx_bw[i];
break;
default:
netdev_err(dev, "Only strict priority and ETS are supported\n");
return -EINVAL;
}
if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) {
netdev_err(dev, "Invalid TC\n");
return -EINVAL;
}
}
if (has_ets_tc && tx_bw_sum != 100) {
netdev_err(dev, "Total ETS bandwidth should equal 100\n");
return -EINVAL;
}
return 0;
}
static int mlxsw_sp_port_headroom_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
struct ieee_ets *ets)
{
struct net_device *dev = mlxsw_sp_port->dev;
mlxsw: spectrum: Track priorities in struct mlxsw_sp_hdroom The mapping from priorities to buffers determines which buffers should be configured. Lossiness of these priorities combined with the mapping determines whether a given buffer should be lossy. Currently this configuration is stored implicitly in DCB ETS, PFC and ethtool PAUSE configuration. Keeping it together with the rest of the headroom configuration and deriving it as needed from PFC / ETS / PAUSE will make things clearer. To that end, add a field "prios" to struct mlxsw_sp_hdroom. Previously, __mlxsw_sp_port_headroom_set() took prio_tc as an argument, and assumed that the same mapping as we use on the egress should be used on ingress as well. Instead, track this configuration at each priority, so that it can be adjusted flexibly. In the following patches, as dcbnl_setbuffer is implemented, it will need to store its own mapping, and it will also be sometimes necessary to revert back to the original ETS mapping. Therefore track two buffer indices: the one for chip configuration (buf_idx), and the source one (ets_buf_idx). Introduce a function to configure the chip-level buffer index, and for now have it simply copy the ETS mapping over to the chip mapping. Update the ETS handler to project prio_tc to the ets_buf_idx and invoke the buf_idx recomputation. Now that there is a canonical place to look for this configuration, mlxsw_sp_port_headroom_set() does not need to invent def_prio_tc to use if DCB is compiled out. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:17 +03:00
struct mlxsw_sp_hdroom hdroom;
int prio;
int err;
mlxsw: spectrum: Split headroom autoresize out of buffer configuration Split mlxsw_sp_port_headroom_set() to three functions. mlxsw_sp_hdroom_bufs_reset_sizes() changes the sizes of the individual PG buffers, and mlxsw_sp_hdroom_configure_buffers() will actually apply the configuration. A third function, mlxsw_sp_hdroom_bufs_fit(), verifies that the requested buffer configuration matches total headroom size requirements. Add wrappers, mlxsw_sp_hdroom_configure() and __..., that will eventually perform full headroom configuration, but for now, only have them verify the configured headroom size, and invoke mlxsw_sp_hdroom_configure_buffers(). Have them take the `force` argument to prepare for a later patch, even though it is currently unused. Note that the loop in mlxsw_sp_hdroom_configure_buffers() only goes through DCBX_MAX_BUFFERS. Since there is no logic to configure the control buffer, it needs to keep the values queried from the FW. Eventually this function should configure all the PGs. Note that conversion of __mlxsw_sp_dcbnl_ieee_setets() is not trivial. That function performs the headroom configuration in three steps: first it resizes the buffers and adds any new ones. Then it redirects priorities to the new buffers. And finally it sets the size of the now-unused buffers to zero. This way no packet drops are introduced. So after invoking mlxsw_sp_hdroom_bufs_reset_sizes(), tweak the configuration to keep the old sizes of PG buffers for those buffers whose size was set to zero. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:20 +03:00
hdroom = *mlxsw_sp_port->hdroom;
mlxsw: spectrum: Track priorities in struct mlxsw_sp_hdroom The mapping from priorities to buffers determines which buffers should be configured. Lossiness of these priorities combined with the mapping determines whether a given buffer should be lossy. Currently this configuration is stored implicitly in DCB ETS, PFC and ethtool PAUSE configuration. Keeping it together with the rest of the headroom configuration and deriving it as needed from PFC / ETS / PAUSE will make things clearer. To that end, add a field "prios" to struct mlxsw_sp_hdroom. Previously, __mlxsw_sp_port_headroom_set() took prio_tc as an argument, and assumed that the same mapping as we use on the egress should be used on ingress as well. Instead, track this configuration at each priority, so that it can be adjusted flexibly. In the following patches, as dcbnl_setbuffer is implemented, it will need to store its own mapping, and it will also be sometimes necessary to revert back to the original ETS mapping. Therefore track two buffer indices: the one for chip configuration (buf_idx), and the source one (ets_buf_idx). Introduce a function to configure the chip-level buffer index, and for now have it simply copy the ETS mapping over to the chip mapping. Update the ETS handler to project prio_tc to the ets_buf_idx and invoke the buf_idx recomputation. Now that there is a canonical place to look for this configuration, mlxsw_sp_port_headroom_set() does not need to invent def_prio_tc to use if DCB is compiled out. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:17 +03:00
for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
hdroom.prios.prio[prio].ets_buf_idx = ets->prio_tc[prio];
mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom);
mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
mlxsw: spectrum: Split headroom autoresize out of buffer configuration Split mlxsw_sp_port_headroom_set() to three functions. mlxsw_sp_hdroom_bufs_reset_sizes() changes the sizes of the individual PG buffers, and mlxsw_sp_hdroom_configure_buffers() will actually apply the configuration. A third function, mlxsw_sp_hdroom_bufs_fit(), verifies that the requested buffer configuration matches total headroom size requirements. Add wrappers, mlxsw_sp_hdroom_configure() and __..., that will eventually perform full headroom configuration, but for now, only have them verify the configured headroom size, and invoke mlxsw_sp_hdroom_configure_buffers(). Have them take the `force` argument to prepare for a later patch, even though it is currently unused. Note that the loop in mlxsw_sp_hdroom_configure_buffers() only goes through DCBX_MAX_BUFFERS. Since there is no logic to configure the control buffer, it needs to keep the values queried from the FW. Eventually this function should configure all the PGs. Note that conversion of __mlxsw_sp_dcbnl_ieee_setets() is not trivial. That function performs the headroom configuration in three steps: first it resizes the buffers and adds any new ones. Then it redirects priorities to the new buffers. And finally it sets the size of the now-unused buffers to zero. This way no packet drops are introduced. So after invoking mlxsw_sp_hdroom_bufs_reset_sizes(), tweak the configuration to keep the old sizes of PG buffers for those buffers whose size was set to zero. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:20 +03:00
mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
mlxsw: spectrum: Track priorities in struct mlxsw_sp_hdroom The mapping from priorities to buffers determines which buffers should be configured. Lossiness of these priorities combined with the mapping determines whether a given buffer should be lossy. Currently this configuration is stored implicitly in DCB ETS, PFC and ethtool PAUSE configuration. Keeping it together with the rest of the headroom configuration and deriving it as needed from PFC / ETS / PAUSE will make things clearer. To that end, add a field "prios" to struct mlxsw_sp_hdroom. Previously, __mlxsw_sp_port_headroom_set() took prio_tc as an argument, and assumed that the same mapping as we use on the egress should be used on ingress as well. Instead, track this configuration at each priority, so that it can be adjusted flexibly. In the following patches, as dcbnl_setbuffer is implemented, it will need to store its own mapping, and it will also be sometimes necessary to revert back to the original ETS mapping. Therefore track two buffer indices: the one for chip configuration (buf_idx), and the source one (ets_buf_idx). Introduce a function to configure the chip-level buffer index, and for now have it simply copy the ETS mapping over to the chip mapping. Update the ETS handler to project prio_tc to the ets_buf_idx and invoke the buf_idx recomputation. Now that there is a canonical place to look for this configuration, mlxsw_sp_port_headroom_set() does not need to invent def_prio_tc to use if DCB is compiled out. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:17 +03:00
err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
if (err) {
netdev_err(dev, "Failed to configure port's headroom\n");
return err;
}
return 0;
}
static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port,
struct ieee_ets *ets)
{
struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets;
struct net_device *dev = mlxsw_sp_port->dev;
int i, err;
/* Egress configuration. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS;
u8 weight = ets->tc_tx_bw[i];
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP, i,
0, dwrr, weight);
if (err) {
netdev_err(dev, "Failed to link subgroup ETS element %d to group\n",
i);
goto err_port_ets_set;
}
}
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
ets->prio_tc[i]);
if (err) {
netdev_err(dev, "Failed to map prio %d to TC %d\n", i,
ets->prio_tc[i]);
goto err_port_prio_tc_set;
}
}
/* Ingress configuration. */
err = mlxsw_sp_port_headroom_ets_set(mlxsw_sp_port, ets);
if (err)
goto err_port_headroom_set;
return 0;
err_port_headroom_set:
i = IEEE_8021QAZ_MAX_TCS;
err_port_prio_tc_set:
for (i--; i >= 0; i--)
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]);
i = IEEE_8021QAZ_MAX_TCS;
err_port_ets_set:
for (i--; i >= 0; i--) {
bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS;
u8 weight = my_ets->tc_tx_bw[i];
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP, i,
0, dwrr, weight);
}
return err;
}
static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
struct ieee_ets *ets)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
int err;
err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets);
if (err)
return err;
err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets);
if (err)
return err;
memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets));
mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
return 0;
}
static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev,
struct dcb_app *app)
{
int prio;
if (app->priority >= IEEE_8021QAZ_MAX_TCS) {
netdev_err(dev, "APP entry with priority value %u is invalid\n",
app->priority);
return -EINVAL;
}
switch (app->selector) {
case IEEE_8021QAZ_APP_SEL_DSCP:
if (app->protocol >= 64) {
netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n",
app->protocol);
return -EINVAL;
}
/* Warn about any DSCP APP entries with the same PID. */
prio = fls(dcb_ieee_getapp_mask(dev, app));
if (prio--) {
if (prio < app->priority)
netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n",
app->priority, app->protocol, prio);
else if (prio > app->priority)
netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n",
app->priority, app->protocol, prio);
}
break;
case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
if (app->protocol) {
netdev_err(dev, "EtherType APP entries with protocol value != 0 not supported\n");
return -EINVAL;
}
break;
default:
netdev_err(dev, "APP entries with selector %u not supported\n",
app->selector);
return -EINVAL;
}
return 0;
}
static u8
mlxsw_sp_port_dcb_app_default_prio(struct mlxsw_sp_port *mlxsw_sp_port)
{
u8 prio_mask;
prio_mask = dcb_ieee_getapp_default_prio_mask(mlxsw_sp_port->dev);
if (prio_mask)
/* Take the highest configured priority. */
return fls(prio_mask) - 1;
return 0;
}
static void
mlxsw_sp_port_dcb_app_dscp_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
u8 default_prio,
struct dcb_ieee_app_dscp_map *map)
{
int i;
dcb_ieee_getapp_dscp_prio_mask_map(mlxsw_sp_port->dev, map);
for (i = 0; i < ARRAY_SIZE(map->map); ++i) {
if (map->map[i])
map->map[i] = fls(map->map[i]) - 1;
else
map->map[i] = default_prio;
}
}
static bool
mlxsw_sp_port_dcb_app_prio_dscp_map(struct mlxsw_sp_port *mlxsw_sp_port,
struct dcb_ieee_app_prio_map *map)
{
bool have_dscp = false;
int i;
dcb_ieee_getapp_prio_dscp_mask_map(mlxsw_sp_port->dev, map);
for (i = 0; i < ARRAY_SIZE(map->map); ++i) {
if (map->map[i]) {
map->map[i] = fls64(map->map[i]) - 1;
have_dscp = true;
}
}
return have_dscp;
}
static int
mlxsw_sp_port_dcb_app_update_qpts(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qpts_trust_state ts)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qpts_pl[MLXSW_REG_QPTS_LEN];
mlxsw_reg_qpts_pack(qpts_pl, mlxsw_sp_port->local_port, ts);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpts), qpts_pl);
}
static int
mlxsw_sp_port_dcb_app_update_qrwe(struct mlxsw_sp_port *mlxsw_sp_port,
bool rewrite_dscp)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qrwe_pl[MLXSW_REG_QRWE_LEN];
mlxsw_reg_qrwe_pack(qrwe_pl, mlxsw_sp_port->local_port,
false, rewrite_dscp);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qrwe), qrwe_pl);
}
static int
mlxsw_sp_port_dcb_toggle_trust(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qpts_trust_state ts)
{
bool rewrite_dscp = ts == MLXSW_REG_QPTS_TRUST_STATE_DSCP;
int err;
if (mlxsw_sp_port->dcb.trust_state == ts)
return 0;
err = mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, ts);
if (err)
return err;
err = mlxsw_sp_port_dcb_app_update_qrwe(mlxsw_sp_port, rewrite_dscp);
if (err)
goto err_update_qrwe;
mlxsw_sp_port->dcb.trust_state = ts;
return 0;
err_update_qrwe:
mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port,
mlxsw_sp_port->dcb.trust_state);
return err;
}
static int
mlxsw_sp_port_dcb_app_update_qpdp(struct mlxsw_sp_port *mlxsw_sp_port,
u8 default_prio)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qpdp_pl[MLXSW_REG_QPDP_LEN];
mlxsw_reg_qpdp_pack(qpdp_pl, mlxsw_sp_port->local_port, default_prio);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdp), qpdp_pl);
}
static int
mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port,
struct dcb_ieee_app_dscp_map *map)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qpdpm_pl[MLXSW_REG_QPDPM_LEN];
short int i;
mlxsw_reg_qpdpm_pack(qpdpm_pl, mlxsw_sp_port->local_port);
for (i = 0; i < ARRAY_SIZE(map->map); ++i)
mlxsw_reg_qpdpm_dscp_pack(qpdpm_pl, i, map->map[i]);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdpm), qpdpm_pl);
}
static int
mlxsw_sp_port_dcb_app_update_qpdsm(struct mlxsw_sp_port *mlxsw_sp_port,
struct dcb_ieee_app_prio_map *map)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qpdsm_pl[MLXSW_REG_QPDSM_LEN];
short int i;
mlxsw_reg_qpdsm_pack(qpdsm_pl, mlxsw_sp_port->local_port);
for (i = 0; i < ARRAY_SIZE(map->map); ++i)
mlxsw_reg_qpdsm_prio_pack(qpdsm_pl, i, map->map[i]);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdsm), qpdsm_pl);
}
static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
{
struct dcb_ieee_app_prio_map prio_map;
struct dcb_ieee_app_dscp_map dscp_map;
u8 default_prio;
bool have_dscp;
int err;
default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port);
err = mlxsw_sp_port_dcb_app_update_qpdp(mlxsw_sp_port, default_prio);
if (err) {
netdev_err(mlxsw_sp_port->dev, "Couldn't configure port default priority\n");
return err;
}
have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
&prio_map);
mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio,
&dscp_map);
err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port,
&dscp_map);
if (err) {
netdev_err(mlxsw_sp_port->dev, "Couldn't configure priority map\n");
return err;
}
err = mlxsw_sp_port_dcb_app_update_qpdsm(mlxsw_sp_port,
&prio_map);
if (err) {
netdev_err(mlxsw_sp_port->dev, "Couldn't configure DSCP rewrite map\n");
return err;
}
mlxsw: spectrum_dcb: Configure DSCP map as the last rule is removed Spectrum systems use DSCP rewrite map to update DSCP field in egressing packets to correspond to priority that the packet has. Whether rewriting will take place is determined at the point when the packet ingresses the switch: if the port is in Trust L3 mode, packet priority is determined from the DSCP map at the port, and DSCP rewrite will happen. If the port is in Trust L2 mode, 802.1p is used for packet prioritization, and no DSCP rewrite will happen. The driver determines the port trust mode based on whether any DSCP prioritization rules are in effect at given port. If there are any, trust level is L3, otherwise it's L2. When the last DSCP rule is removed, the port is switched to trust L2. Under that scenario, if DSCP of a packet should be rewritten, it should be rewritten to 0. However, when switching to Trust L2, the driver neglects to also update the DSCP rewrite map. The last DSCP rule thus remains in effect, and packets egressing through this port, if they have the right priority, will have their DSCP set according to this rule. Fix by first configuring the rewrite map, and only then switching to trust L2 and bailing out. Fixes: b2b1dab6884e ("mlxsw: spectrum: Support ieee_setapp, ieee_delapp") Signed-off-by: Petr Machata <petrm@mellanox.com> Reported-by: Alex Veber <alexve@mellanox.com> Tested-by: Alex Veber <alexve@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-17 23:29:07 +03:00
if (!have_dscp) {
err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
MLXSW_REG_QPTS_TRUST_STATE_PCP);
if (err)
netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
return err;
}
err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
MLXSW_REG_QPTS_TRUST_STATE_DSCP);
if (err) {
/* A failure to set trust DSCP means that the QPDPM and QPDSM
* maps installed above are not in effect. And since we are here
* attempting to set trust DSCP, we couldn't have attempted to
* switch trust to PCP. Thus no cleanup is necessary.
*/
netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L3\n");
return err;
}
return 0;
}
static int mlxsw_sp_dcbnl_ieee_setapp(struct net_device *dev,
struct dcb_app *app)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
int err;
err = mlxsw_sp_dcbnl_app_validate(dev, app);
if (err)
return err;
err = dcb_ieee_setapp(dev, app);
if (err)
return err;
err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port);
if (err)
goto err_update;
return 0;
err_update:
dcb_ieee_delapp(dev, app);
return err;
}
static int mlxsw_sp_dcbnl_ieee_delapp(struct net_device *dev,
struct dcb_app *app)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
int err;
err = dcb_ieee_delapp(dev, app);
if (err)
return err;
err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port);
if (err)
netdev_err(dev, "Failed to update DCB APP configuration\n");
return 0;
}
static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev,
struct ieee_maxrate *maxrate)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate));
return 0;
}
static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
struct ieee_maxrate *maxrate)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate;
int err, i;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
maxrate->tc_maxrate[i], 0);
if (err) {
netdev_err(dev, "Failed to set maxrate for TC %d\n", i);
goto err_port_ets_maxrate_set;
}
}
memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate));
return 0;
err_port_ets_maxrate_set:
for (i--; i >= 0; i--)
mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
my_maxrate->tc_maxrate[i], 0);
return err;
}
static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port,
u8 prio)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc;
char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
int err;
mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port,
MLXSW_REG_PPCNT_PRIO_CNT, prio);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
if (err)
return err;
my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl);
my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl);
return 0;
}
static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev,
struct ieee_pfc *pfc)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
int err, i;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i);
if (err) {
netdev_err(dev, "Failed to get PFC count for priority %d\n",
i);
return err;
}
}
memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc));
return 0;
}
static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port,
struct ieee_pfc *pfc)
{
char pfcc_pl[MLXSW_REG_PFCC_LEN];
mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
pfcc_pl);
}
static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
struct ieee_pfc *pfc)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
mlxsw: spectrum: Unify delay handling between PFC and pause When a priority is marked as lossless using DCB PFC, or when pause frames are enabled on a port, mlxsw adds to port buffers an extra space to cover the traffic that will arrive between the time that a pause or PFC frame is emitted, and the time traffic actually stops. This is called the delay. The concept is the same in PFC and pause, however the way the extra buffer space is calculated differs. In this patch, unify this handling. Delay is to be measured in bytes of extra space, and will not include MTU. PFC handler sets the delay directly from the parameter it gets through the DCB interface. To convert pause handler, move MLXSW_SP_PAUSE_DELAY to ethtool module, convert to bytes, and reduce it by maximum MTU, and divide by two. Then it has the same meaning as the delay_bytes set by the PFC handler. Keep the delay_bytes value in struct mlxsw_sp_hdroom introduced in the previous patch. Change PFC and pause handlers to store the new delay value there and have __mlxsw_sp_port_headroom_set() take it from there. Instead of mlxsw_sp_pfc_delay_get() and mlxsw_sp_pg_buf_delay_get(), introduce mlxsw_sp_hdroom_buf_delay_get() to calculate the delay provision. Drop the unnecessary MLXSW_SP_CELL_FACTOR, and instead add an explanatory comment describing the formula used. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:15 +03:00
struct mlxsw_sp_hdroom orig_hdroom;
struct mlxsw_sp_hdroom hdroom;
int prio;
int err;
if (pause_en && pfc->pfc_en) {
netdev_err(dev, "PAUSE frames already enabled on port\n");
return -EINVAL;
}
mlxsw: spectrum: Unify delay handling between PFC and pause When a priority is marked as lossless using DCB PFC, or when pause frames are enabled on a port, mlxsw adds to port buffers an extra space to cover the traffic that will arrive between the time that a pause or PFC frame is emitted, and the time traffic actually stops. This is called the delay. The concept is the same in PFC and pause, however the way the extra buffer space is calculated differs. In this patch, unify this handling. Delay is to be measured in bytes of extra space, and will not include MTU. PFC handler sets the delay directly from the parameter it gets through the DCB interface. To convert pause handler, move MLXSW_SP_PAUSE_DELAY to ethtool module, convert to bytes, and reduce it by maximum MTU, and divide by two. Then it has the same meaning as the delay_bytes set by the PFC handler. Keep the delay_bytes value in struct mlxsw_sp_hdroom introduced in the previous patch. Change PFC and pause handlers to store the new delay value there and have __mlxsw_sp_port_headroom_set() take it from there. Instead of mlxsw_sp_pfc_delay_get() and mlxsw_sp_pg_buf_delay_get(), introduce mlxsw_sp_hdroom_buf_delay_get() to calculate the delay provision. Drop the unnecessary MLXSW_SP_CELL_FACTOR, and instead add an explanatory comment describing the formula used. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:15 +03:00
orig_hdroom = *mlxsw_sp_port->hdroom;
hdroom = orig_hdroom;
if (pfc->pfc_en)
hdroom.delay_bytes = DIV_ROUND_UP(pfc->delay, BITS_PER_BYTE);
else
hdroom.delay_bytes = 0;
for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
hdroom.prios.prio[prio].lossy = !(pfc->pfc_en & BIT(prio));
mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
mlxsw: spectrum: Split headroom autoresize out of buffer configuration Split mlxsw_sp_port_headroom_set() to three functions. mlxsw_sp_hdroom_bufs_reset_sizes() changes the sizes of the individual PG buffers, and mlxsw_sp_hdroom_configure_buffers() will actually apply the configuration. A third function, mlxsw_sp_hdroom_bufs_fit(), verifies that the requested buffer configuration matches total headroom size requirements. Add wrappers, mlxsw_sp_hdroom_configure() and __..., that will eventually perform full headroom configuration, but for now, only have them verify the configured headroom size, and invoke mlxsw_sp_hdroom_configure_buffers(). Have them take the `force` argument to prepare for a later patch, even though it is currently unused. Note that the loop in mlxsw_sp_hdroom_configure_buffers() only goes through DCBX_MAX_BUFFERS. Since there is no logic to configure the control buffer, it needs to keep the values queried from the FW. Eventually this function should configure all the PGs. Note that conversion of __mlxsw_sp_dcbnl_ieee_setets() is not trivial. That function performs the headroom configuration in three steps: first it resizes the buffers and adds any new ones. Then it redirects priorities to the new buffers. And finally it sets the size of the now-unused buffers to zero. This way no packet drops are introduced. So after invoking mlxsw_sp_hdroom_bufs_reset_sizes(), tweak the configuration to keep the old sizes of PG buffers for those buffers whose size was set to zero. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:20 +03:00
mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
mlxsw: spectrum: Split headroom autoresize out of buffer configuration Split mlxsw_sp_port_headroom_set() to three functions. mlxsw_sp_hdroom_bufs_reset_sizes() changes the sizes of the individual PG buffers, and mlxsw_sp_hdroom_configure_buffers() will actually apply the configuration. A third function, mlxsw_sp_hdroom_bufs_fit(), verifies that the requested buffer configuration matches total headroom size requirements. Add wrappers, mlxsw_sp_hdroom_configure() and __..., that will eventually perform full headroom configuration, but for now, only have them verify the configured headroom size, and invoke mlxsw_sp_hdroom_configure_buffers(). Have them take the `force` argument to prepare for a later patch, even though it is currently unused. Note that the loop in mlxsw_sp_hdroom_configure_buffers() only goes through DCBX_MAX_BUFFERS. Since there is no logic to configure the control buffer, it needs to keep the values queried from the FW. Eventually this function should configure all the PGs. Note that conversion of __mlxsw_sp_dcbnl_ieee_setets() is not trivial. That function performs the headroom configuration in three steps: first it resizes the buffers and adds any new ones. Then it redirects priorities to the new buffers. And finally it sets the size of the now-unused buffers to zero. This way no packet drops are introduced. So after invoking mlxsw_sp_hdroom_bufs_reset_sizes(), tweak the configuration to keep the old sizes of PG buffers for those buffers whose size was set to zero. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:20 +03:00
err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
if (err) {
netdev_err(dev, "Failed to configure port's headroom for PFC\n");
return err;
}
err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc);
if (err) {
netdev_err(dev, "Failed to configure PFC\n");
goto err_port_pfc_set;
}
memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc));
mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
return 0;
err_port_pfc_set:
mlxsw: spectrum: Split headroom autoresize out of buffer configuration Split mlxsw_sp_port_headroom_set() to three functions. mlxsw_sp_hdroom_bufs_reset_sizes() changes the sizes of the individual PG buffers, and mlxsw_sp_hdroom_configure_buffers() will actually apply the configuration. A third function, mlxsw_sp_hdroom_bufs_fit(), verifies that the requested buffer configuration matches total headroom size requirements. Add wrappers, mlxsw_sp_hdroom_configure() and __..., that will eventually perform full headroom configuration, but for now, only have them verify the configured headroom size, and invoke mlxsw_sp_hdroom_configure_buffers(). Have them take the `force` argument to prepare for a later patch, even though it is currently unused. Note that the loop in mlxsw_sp_hdroom_configure_buffers() only goes through DCBX_MAX_BUFFERS. Since there is no logic to configure the control buffer, it needs to keep the values queried from the FW. Eventually this function should configure all the PGs. Note that conversion of __mlxsw_sp_dcbnl_ieee_setets() is not trivial. That function performs the headroom configuration in three steps: first it resizes the buffers and adds any new ones. Then it redirects priorities to the new buffers. And finally it sets the size of the now-unused buffers to zero. This way no packet drops are introduced. So after invoking mlxsw_sp_hdroom_bufs_reset_sizes(), tweak the configuration to keep the old sizes of PG buffers for those buffers whose size was set to zero. Signed-off-by: Petr Machata <petrm@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-16 09:35:20 +03:00
mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
return err;
}
static int mlxsw_sp_dcbnl_getbuffer(struct net_device *dev, struct dcbnl_buffer *buf)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_hdroom *hdroom = mlxsw_sp_port->hdroom;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
int prio;
int i;
buf->total_size = 0;
BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT);
for (i = 0; i < MLXSW_SP_PB_COUNT; i++) {
u32 bytes = mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->bufs.buf[i].size_cells);
if (i < DCBX_MAX_BUFFERS)
buf->buffer_size[i] = bytes;
buf->total_size += bytes;
}
buf->total_size += mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->int_buf.size_cells);
for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++)
buf->prio2buffer[prio] = hdroom->prios.prio[prio].buf_idx;
return 0;
}
static int mlxsw_sp_dcbnl_setbuffer(struct net_device *dev, struct dcbnl_buffer *buf)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_hdroom hdroom;
int prio;
int i;
hdroom = *mlxsw_sp_port->hdroom;
if (hdroom.mode != MLXSW_SP_HDROOM_MODE_TC) {
netdev_err(dev, "The use of dcbnl_setbuffer is only allowed if egress is configured using TC\n");
return -EINVAL;
}
for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++)
hdroom.prios.prio[prio].set_buf_idx = buf->prio2buffer[prio];
BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT);
for (i = 0; i < DCBX_MAX_BUFFERS; i++)
hdroom.bufs.buf[i].set_size_cells = mlxsw_sp_bytes_cells(mlxsw_sp,
buf->buffer_size[i]);
mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom);
mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom);
mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
}
static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = {
.ieee_getets = mlxsw_sp_dcbnl_ieee_getets,
.ieee_setets = mlxsw_sp_dcbnl_ieee_setets,
.ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate,
.ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate,
.ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc,
.ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc,
.ieee_setapp = mlxsw_sp_dcbnl_ieee_setapp,
.ieee_delapp = mlxsw_sp_dcbnl_ieee_delapp,
.getdcbx = mlxsw_sp_dcbnl_getdcbx,
.setdcbx = mlxsw_sp_dcbnl_setdcbx,
.dcbnl_getbuffer = mlxsw_sp_dcbnl_getbuffer,
.dcbnl_setbuffer = mlxsw_sp_dcbnl_setbuffer,
};
static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets),
GFP_KERNEL);
if (!mlxsw_sp_port->dcb.ets)
return -ENOMEM;
mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
return 0;
}
static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
kfree(mlxsw_sp_port->dcb.ets);
}
static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
int i;
mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate),
GFP_KERNEL);
if (!mlxsw_sp_port->dcb.maxrate)
return -ENOMEM;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS;
return 0;
}
static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
kfree(mlxsw_sp_port->dcb.maxrate);
}
static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc),
GFP_KERNEL);
if (!mlxsw_sp_port->dcb.pfc)
return -ENOMEM;
mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
return 0;
}
static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
kfree(mlxsw_sp_port->dcb.pfc);
}
int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
int err;
err = mlxsw_sp_port_ets_init(mlxsw_sp_port);
if (err)
return err;
err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port);
if (err)
goto err_port_maxrate_init;
err = mlxsw_sp_port_pfc_init(mlxsw_sp_port);
if (err)
goto err_port_pfc_init;
mlxsw_sp_port->dcb.trust_state = MLXSW_REG_QPTS_TRUST_STATE_PCP;
mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops;
return 0;
err_port_pfc_init:
mlxsw_sp_port_maxrate_fini(mlxsw_sp_port);
err_port_maxrate_init:
mlxsw_sp_port_ets_fini(mlxsw_sp_port);
return err;
}
void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
mlxsw_sp_port_pfc_fini(mlxsw_sp_port);
mlxsw_sp_port_maxrate_fini(mlxsw_sp_port);
mlxsw_sp_port_ets_fini(mlxsw_sp_port);
}