linux/net/dsa/dsa_priv.h
Vladimir Oltean bff33f7e2a net: dsa: implement auto-normalization of MTU for bridge hardware datapath
Many switches don't have an explicit knob for configuring the MTU
(maximum transmission unit per interface).  Instead, they do the
length-based packet admission checks on the ingress interface, for
reasons that are easy to understand (why would you accept a packet in
the queuing subsystem if you know you're going to drop it anyway).

So it is actually the MRU that these switches permit configuring.

In Linux there only exists the IFLA_MTU netlink attribute and the
associated dev_set_mtu function. The comments like to play blind and say
that it's changing the "maximum transfer unit", which is to say that
there isn't any directionality in the meaning of the MTU word. So that
is the interpretation that this patch is giving to things: MTU == MRU.

When 2 interfaces having different MTUs are bridged, the bridge driver
MTU auto-adjustment logic kicks in: what br_mtu_auto_adjust() does is it
adjusts the MTU of the bridge net device itself (and not that of the
slave net devices) to the minimum value of all slave interfaces, in
order for forwarded packets to not exceed the MTU regardless of the
interface they are received and send on.

The idea behind this behavior, and why the slave MTUs are not adjusted,
is that normal termination from Linux over the L2 forwarding domain
should happen over the bridge net device, which _is_ properly limited by
the minimum MTU. And termination over individual slave devices is
possible even if those are bridged. But that is not "forwarding", so
there's no reason to do normalization there, since only a single
interface sees that packet.

The problem with those switches that can only control the MRU is with
the offloaded data path, where a packet received on an interface with
MRU 9000 would still be forwarded to an interface with MRU 1500. And the
br_mtu_auto_adjust() function does not really help, since the MTU
configured on the bridge net device is ignored.

In order to enforce the de-facto MTU == MRU rule for these switches, we
need to do MTU normalization, which means: in order for no packet larger
than the MTU configured on this port to be sent, then we need to limit
the MRU on all ports that this packet could possibly come from. AKA
since we are configuring the MRU via MTU, it means that all ports within
a bridge forwarding domain should have the same MTU.

And that is exactly what this patch is trying to do.

>From an implementation perspective, we try to follow the intent of the
user, otherwise there is a risk that we might livelock them (they try to
change the MTU on an already-bridged interface, but we just keep
changing it back in an attempt to keep the MTU normalized). So the MTU
that the bridge is normalized to is either:

 - The most recently changed one:

   ip link set dev swp0 master br0
   ip link set dev swp1 master br0
   ip link set dev swp0 mtu 1400

   This sequence will make swp1 inherit MTU 1400 from swp0.

 - The one of the most recently added interface to the bridge:

   ip link set dev swp0 master br0
   ip link set dev swp1 mtu 1400
   ip link set dev swp1 master br0

   The above sequence will make swp0 inherit MTU 1400 as well.

Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-03-27 16:07:25 -07:00

202 lines
5.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* net/dsa/dsa_priv.h - Hardware switch handling
* Copyright (c) 2008-2009 Marvell Semiconductor
*/
#ifndef __DSA_PRIV_H
#define __DSA_PRIV_H
#include <linux/phy.h>
#include <linux/netdevice.h>
#include <linux/netpoll.h>
#include <net/dsa.h>
enum {
DSA_NOTIFIER_AGEING_TIME,
DSA_NOTIFIER_BRIDGE_JOIN,
DSA_NOTIFIER_BRIDGE_LEAVE,
DSA_NOTIFIER_FDB_ADD,
DSA_NOTIFIER_FDB_DEL,
DSA_NOTIFIER_MDB_ADD,
DSA_NOTIFIER_MDB_DEL,
DSA_NOTIFIER_VLAN_ADD,
DSA_NOTIFIER_VLAN_DEL,
DSA_NOTIFIER_MTU,
};
/* DSA_NOTIFIER_AGEING_TIME */
struct dsa_notifier_ageing_time_info {
struct switchdev_trans *trans;
unsigned int ageing_time;
};
/* DSA_NOTIFIER_BRIDGE_* */
struct dsa_notifier_bridge_info {
struct net_device *br;
int sw_index;
int port;
};
/* DSA_NOTIFIER_FDB_* */
struct dsa_notifier_fdb_info {
int sw_index;
int port;
const unsigned char *addr;
u16 vid;
};
/* DSA_NOTIFIER_MDB_* */
struct dsa_notifier_mdb_info {
const struct switchdev_obj_port_mdb *mdb;
struct switchdev_trans *trans;
int sw_index;
int port;
};
/* DSA_NOTIFIER_VLAN_* */
struct dsa_notifier_vlan_info {
const struct switchdev_obj_port_vlan *vlan;
struct switchdev_trans *trans;
int sw_index;
int port;
};
/* DSA_NOTIFIER_MTU */
struct dsa_notifier_mtu_info {
bool propagate_upstream;
int sw_index;
int port;
int mtu;
};
struct dsa_slave_priv {
/* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
struct pcpu_sw_netstats *stats64;
/* DSA port data, such as switch, port index, etc. */
struct dsa_port *dp;
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *netpoll;
#endif
/* TC context */
struct list_head mall_tc_list;
};
/* dsa.c */
const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol);
void dsa_tag_driver_put(const struct dsa_device_ops *ops);
bool dsa_schedule_work(struct work_struct *work);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid,
u16 flags,
struct netlink_ext_ack *extack);
int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid);
/* master.c */
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
void dsa_master_teardown(struct net_device *dev);
static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
int device, int port)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
struct dsa_port *dp;
list_for_each_entry(dp, &dst->ports, list)
if (dp->ds->index == device && dp->index == port &&
dp->type == DSA_PORT_TYPE_USER)
return dp->slave;
return NULL;
}
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct switchdev_trans *trans);
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
struct switchdev_trans *trans);
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
bool propagate_upstream);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans);
int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
struct switchdev_trans *trans);
int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
struct switchdev_trans *trans);
int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
struct switchdev_trans *trans);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
int dsa_slave_create(struct dsa_port *dp);
void dsa_slave_destroy(struct net_device *slave_dev);
bool dsa_slave_dev_check(const struct net_device *dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
return p->dp;
}
static inline struct net_device *
dsa_slave_to_master(const struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
return dp->cpu_dp->master;
}
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
/* dsa2.c */
extern struct list_head dsa_tree_list;
#endif