linux/include/net/switchdev.h
Vladimir Oltean 8ca07176ab net: switchdev: introduce a fanout helper for SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE
Currently DSA has an issue with FDB entries pointing towards the bridge
in the presence of br_fdb_replay() being called at port join and leave
time.

In particular, each bridge port will ask for a replay for the FDB
entries pointing towards the bridge when it joins, and for another
replay when it leaves.

This means that for example, a bridge with 4 switch ports will notify
DSA 4 times of the bridge MAC address.

But if the MAC address of the bridge changes during the normal runtime
of the system, the bridge notifies switchdev [ once ] of the deletion of
the old MAC address as a local FDB towards the bridge, and of the
insertion [ again once ] of the new MAC address as a local FDB.

This is a problem, because DSA keeps the old MAC address as a host FDB
entry with refcount 4 (4 ports asked for it using br_fdb_replay). So the
old MAC address will not be deleted. Additionally, the new MAC address
will only be installed with refcount 1, and when the first switch port
leaves the bridge (leaving 3 others as still members), it will delete
with it the new MAC address of the bridge from the local FDB entries
kept by DSA (because the br_fdb_replay call on deletion will bring the
entry's refcount from 1 to 0).

So the problem, really, is that the number of br_fdb_replay() calls is
not matched with the refcount that a host FDB is offloaded to DSA during
normal runtime.

An elegant way to solve the problem would be to make the switchdev
notification emitted by br_fdb_change_mac_address() result in a host FDB
kept by DSA which has a refcount exactly equal to the number of ports
under that bridge. Then, no matter how many DSA ports join or leave that
bridge, the host FDB entry will always be deleted when there are exactly
zero remaining DSA switch ports members of the bridge.

To implement the proposed solution, we remember that the switchdev
objects and port attributes have some helpers provided by switchdev,
which can be optionally called by drivers:
switchdev_handle_port_obj_{add,del} and switchdev_handle_port_attr_set.
These helpers:
- fan out a switchdev object/attribute emitted for the bridge towards
  all the lower interfaces that pass the check_cb().
- fan out a switchdev object/attribute emitted for a bridge port that is
  a LAG towards all the lower interfaces that pass the check_cb().

In other words, this is the model we need for the FDB events too:
something that will keep an FDB entry emitted towards a physical port as
it is, but translate an FDB entry emitted towards the bridge into N FDB
entries, one per physical port.

Of course, there are many differences between fanning out a switchdev
object (VLAN) on 3 lower interfaces of a LAG and fanning out an FDB
entry on 3 lower interfaces of a LAG. Intuitively, an FDB entry towards
a LAG should be treated specially, because FDB entries are unicast, we
can't just install the same address towards 3 destinations. It is
imaginable that drivers might want to treat this case specifically, so
create some methods for this case and do not recurse into the LAG lower
ports, just the bridge ports.

DSA also listens for FDB entries on "foreign" interfaces, aka interfaces
bridged with us which are not part of our hardware domain: think an
Ethernet switch bridged with a Wi-Fi AP. For those addresses, DSA
installs host FDB entries. However, there we have the same problem
(those host FDB entries are installed with a refcount of only 1) and an
even bigger one which we did not have with FDB entries towards the
bridge:

br_fdb_replay() is currently not called for FDB entries on foreign
interfaces, just for the physical port and for the bridge itself.

So when DSA sniffs an address learned by the software bridge towards a
foreign interface like an e1000 port, and then that e1000 leaves the
bridge, DSA remains with the dangling host FDB address. That will be
fixed separately by replaying all FDB entries and not just the ones
towards the port and the bridge.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-20 07:04:27 -07:00

448 lines
13 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* include/net/switchdev.h - Switch device API
* Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
* Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
*/
#ifndef _LINUX_SWITCHDEV_H_
#define _LINUX_SWITCHDEV_H_
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/list.h>
#include <net/ip_fib.h>
#define SWITCHDEV_F_NO_RECURSE BIT(0)
#define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1)
#define SWITCHDEV_F_DEFER BIT(2)
enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_UNDEFINED,
SWITCHDEV_ATTR_ID_PORT_STP_STATE,
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
SWITCHDEV_ATTR_ID_PORT_MROUTER,
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL,
SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
};
struct switchdev_brport_flags {
unsigned long val;
unsigned long mask;
};
struct switchdev_attr {
struct net_device *orig_dev;
enum switchdev_attr_id id;
u32 flags;
void *complete_priv;
void (*complete)(struct net_device *dev, int err, void *priv);
union {
u8 stp_state; /* PORT_STP_STATE */
struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */
bool mrouter; /* PORT_MROUTER */
clock_t ageing_time; /* BRIDGE_AGEING_TIME */
bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */
u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */
bool mc_disabled; /* MC_DISABLED */
u8 mrp_port_role; /* MRP_PORT_ROLE */
} u;
};
enum switchdev_obj_id {
SWITCHDEV_OBJ_ID_UNDEFINED,
SWITCHDEV_OBJ_ID_PORT_VLAN,
SWITCHDEV_OBJ_ID_PORT_MDB,
SWITCHDEV_OBJ_ID_HOST_MDB,
SWITCHDEV_OBJ_ID_MRP,
SWITCHDEV_OBJ_ID_RING_TEST_MRP,
SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
SWITCHDEV_OBJ_ID_RING_STATE_MRP,
SWITCHDEV_OBJ_ID_IN_TEST_MRP,
SWITCHDEV_OBJ_ID_IN_ROLE_MRP,
SWITCHDEV_OBJ_ID_IN_STATE_MRP,
};
struct switchdev_obj {
struct list_head list;
struct net_device *orig_dev;
enum switchdev_obj_id id;
u32 flags;
void *complete_priv;
void (*complete)(struct net_device *dev, int err, void *priv);
};
/* SWITCHDEV_OBJ_ID_PORT_VLAN */
struct switchdev_obj_port_vlan {
struct switchdev_obj obj;
u16 flags;
u16 vid;
};
#define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \
container_of((OBJ), struct switchdev_obj_port_vlan, obj)
/* SWITCHDEV_OBJ_ID_PORT_MDB */
struct switchdev_obj_port_mdb {
struct switchdev_obj obj;
unsigned char addr[ETH_ALEN];
u16 vid;
};
#define SWITCHDEV_OBJ_PORT_MDB(OBJ) \
container_of((OBJ), struct switchdev_obj_port_mdb, obj)
/* SWITCHDEV_OBJ_ID_MRP */
struct switchdev_obj_mrp {
struct switchdev_obj obj;
struct net_device *p_port;
struct net_device *s_port;
u32 ring_id;
u16 prio;
};
#define SWITCHDEV_OBJ_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_mrp, obj)
/* SWITCHDEV_OBJ_ID_RING_TEST_MRP */
struct switchdev_obj_ring_test_mrp {
struct switchdev_obj obj;
/* The value is in us and a value of 0 represents to stop */
u32 interval;
u8 max_miss;
u32 ring_id;
u32 period;
bool monitor;
};
#define SWITCHDEV_OBJ_RING_TEST_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_ring_test_mrp, obj)
/* SWICHDEV_OBJ_ID_RING_ROLE_MRP */
struct switchdev_obj_ring_role_mrp {
struct switchdev_obj obj;
u8 ring_role;
u32 ring_id;
u8 sw_backup;
};
#define SWITCHDEV_OBJ_RING_ROLE_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_ring_role_mrp, obj)
struct switchdev_obj_ring_state_mrp {
struct switchdev_obj obj;
u8 ring_state;
u32 ring_id;
};
#define SWITCHDEV_OBJ_RING_STATE_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_ring_state_mrp, obj)
/* SWITCHDEV_OBJ_ID_IN_TEST_MRP */
struct switchdev_obj_in_test_mrp {
struct switchdev_obj obj;
/* The value is in us and a value of 0 represents to stop */
u32 interval;
u32 in_id;
u32 period;
u8 max_miss;
};
#define SWITCHDEV_OBJ_IN_TEST_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_in_test_mrp, obj)
/* SWICHDEV_OBJ_ID_IN_ROLE_MRP */
struct switchdev_obj_in_role_mrp {
struct switchdev_obj obj;
struct net_device *i_port;
u32 ring_id;
u16 in_id;
u8 in_role;
u8 sw_backup;
};
#define SWITCHDEV_OBJ_IN_ROLE_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_in_role_mrp, obj)
struct switchdev_obj_in_state_mrp {
struct switchdev_obj obj;
u32 in_id;
u8 in_state;
};
#define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_in_state_mrp, obj)
typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
SWITCHDEV_FDB_DEL_TO_BRIDGE,
SWITCHDEV_FDB_ADD_TO_DEVICE,
SWITCHDEV_FDB_DEL_TO_DEVICE,
SWITCHDEV_FDB_OFFLOADED,
SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */
SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */
SWITCHDEV_PORT_ATTR_SET, /* May be blocking . */
SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE,
SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE,
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,
};
struct switchdev_notifier_info {
struct net_device *dev;
struct netlink_ext_ack *extack;
const void *ctx;
};
struct switchdev_notifier_fdb_info {
struct switchdev_notifier_info info; /* must be first */
const unsigned char *addr;
u16 vid;
u8 added_by_user:1,
is_local:1,
offloaded:1;
};
struct switchdev_notifier_port_obj_info {
struct switchdev_notifier_info info; /* must be first */
const struct switchdev_obj *obj;
bool handled;
};
struct switchdev_notifier_port_attr_info {
struct switchdev_notifier_info info; /* must be first */
const struct switchdev_attr *attr;
bool handled;
};
static inline struct net_device *
switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
{
return info->dev;
}
static inline struct netlink_ext_ack *
switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info)
{
return info->extack;
}
static inline bool
switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info)
{
return !fdb_info->added_by_user && !fdb_info->is_local;
}
#ifdef CONFIG_NET_SWITCHDEV
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack);
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack);
int switchdev_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj);
int register_switchdev_notifier(struct notifier_block *nb);
int unregister_switchdev_notifier(struct notifier_block *nb);
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack);
int register_switchdev_blocking_notifier(struct notifier_block *nb);
int unregister_switchdev_blocking_notifier(struct notifier_block *nb);
int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack);
void switchdev_port_fwd_mark_set(struct net_device *dev,
struct net_device *group_dev,
bool joining);
int switchdev_handle_fdb_add_to_device(struct net_device *dev,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
const struct net_device *foreign_dev),
int (*add_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info),
int (*lag_add_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info));
int switchdev_handle_fdb_del_to_device(struct net_device *dev,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
const struct net_device *foreign_dev),
int (*del_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info),
int (*lag_del_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info));
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack));
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj));
int switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack));
#else
static inline void switchdev_deferred_process(void)
{
}
static inline int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
static inline int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
static inline int switchdev_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
return -EOPNOTSUPP;
}
static inline int register_switchdev_notifier(struct notifier_block *nb)
{
return 0;
}
static inline int unregister_switchdev_notifier(struct notifier_block *nb)
{
return 0;
}
static inline int call_switchdev_notifiers(unsigned long val,
struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack)
{
return NOTIFY_DONE;
}
static inline int
register_switchdev_blocking_notifier(struct notifier_block *nb)
{
return 0;
}
static inline int
unregister_switchdev_blocking_notifier(struct notifier_block *nb)
{
return 0;
}
static inline int
call_switchdev_blocking_notifiers(unsigned long val,
struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack)
{
return NOTIFY_DONE;
}
static inline int
switchdev_handle_fdb_add_to_device(struct net_device *dev,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
const struct net_device *foreign_dev),
int (*add_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info),
int (*lag_add_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info))
{
return 0;
}
static inline int
switchdev_handle_fdb_del_to_device(struct net_device *dev,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
const struct net_device *foreign_dev),
int (*del_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info),
int (*lag_del_cb)(struct net_device *dev,
const struct net_device *orig_dev, const void *ctx,
const struct switchdev_notifier_fdb_info *fdb_info));
{
return 0;
}
static inline int
switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
return 0;
}
static inline int
switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
return 0;
}
static inline int
switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack))
{
return 0;
}
#endif
#endif /* _LINUX_SWITCHDEV_H_ */