linux/net/bridge/br_fdb.c

833 lines
20 KiB
C
Raw Normal View History

/*
* Forwarding database
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
#include <linux/times.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/jhash.h>
#include <linux/random.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/atomic.h>
#include <asm/unaligned.h>
#include <linux/if_vlan.h>
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *, int);
static u32 fdb_salt __read_mostly;
int __init br_fdb_init(void)
{
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
sizeof(struct net_bridge_fdb_entry),
0,
SLAB_HWCACHE_ALIGN, NULL);
if (!br_fdb_cache)
return -ENOMEM;
get_random_bytes(&fdb_salt, sizeof(fdb_salt));
return 0;
}
void br_fdb_fini(void)
{
kmem_cache_destroy(br_fdb_cache);
}
/* if topology_changing then use forward_delay (default 15 sec)
* otherwise keep longer (default 5 minutes)
*/
static inline unsigned long hold_time(const struct net_bridge *br)
{
return br->topology_change ? br->forward_delay : br->ageing_time;
}
static inline int has_expired(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
return !fdb->is_static &&
time_before_eq(fdb->updated + hold_time(br), jiffies);
}
static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
{
/* use 1 byte of OUI and 3 bytes of NIC */
u32 key = get_unaligned((u32 *)(mac + 2));
return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
}
static void fdb_rcu_free(struct rcu_head *head)
{
struct net_bridge_fdb_entry *ent
= container_of(head, struct net_bridge_fdb_entry, rcu);
kmem_cache_free(br_fdb_cache, ent);
}
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
hlist_del_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
}
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge *br = p->br;
bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false;
int i;
spin_lock_bh(&br->hash_lock);
/* Search all chains since old address/hash is unknown */
for (i = 0; i < BR_HASH_SIZE; i++) {
struct hlist_node *h;
hlist_for_each(h, &br->hash[i]) {
struct net_bridge_fdb_entry *f;
f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
if (f->dst == p && f->is_local) {
/* maybe another port has same hw addr? */
struct net_bridge_port *op;
u16 vid = f->vlan_id;
list_for_each_entry(op, &br->port_list, list) {
if (op != p &&
ether_addr_equal(op->dev->dev_addr,
f->addr.addr) &&
nbp_vlan_find(op, vid)) {
f->dst = op;
goto insert;
}
}
/* delete old one */
fdb_delete(br, f);
insert:
/* insert new address, may fail if invalid
* address or dup.
*/
fdb_insert(br, p, newaddr, vid);
/* if this port has no vlan information
* configured, we can safely be done at
* this point.
*/
if (no_vlan)
goto done;
}
}
}
done:
spin_unlock_bh(&br->hash_lock);
}
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
{
struct net_bridge_fdb_entry *f;
struct net_port_vlans *pv;
u16 vid = 0;
/* If old entry was unassociated with any port, then delete it. */
f = __br_fdb_get(br, br->dev->dev_addr, 0);
if (f && f->is_local && !f->dst)
fdb_delete(br, f);
fdb_insert(br, NULL, newaddr, 0);
/* Now remove and add entries for every VLAN configured on the
* bridge. This function runs under RTNL so the bitmap will not
* change from under us.
*/
pv = br_get_vlan_info(br);
if (!pv)
return;
for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid);
vid < BR_VLAN_BITMAP_LEN;
vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
fdb_delete(br, f);
fdb_insert(br, NULL, newaddr, vid);
}
}
void br_fdb_cleanup(unsigned long _data)
{
struct net_bridge *br = (struct net_bridge *)_data;
unsigned long delay = hold_time(br);
unsigned long next_timer = jiffies + br->ageing_time;
int i;
spin_lock(&br->hash_lock);
for (i = 0; i < BR_HASH_SIZE; i++) {
struct net_bridge_fdb_entry *f;
struct hlist_node *h, *n;
hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
unsigned long this_timer;
if (f->is_static)
continue;
this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies))
fdb_delete(br, f);
else if (time_before(this_timer, next_timer))
next_timer = this_timer;
}
}
spin_unlock(&br->hash_lock);
mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
}
/* Completely flush all dynamic entries in forwarding database.*/
void br_fdb_flush(struct net_bridge *br)
{
int i;
spin_lock_bh(&br->hash_lock);
for (i = 0; i < BR_HASH_SIZE; i++) {
struct net_bridge_fdb_entry *f;
struct hlist_node *h, *n;
hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
if (!f->is_static)
fdb_delete(br, f);
}
}
spin_unlock_bh(&br->hash_lock);
}
/* Flush all entries referring to a specific port.
* if do_all is set also flush static entries
*/
void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p,
int do_all)
{
int i;
spin_lock_bh(&br->hash_lock);
for (i = 0; i < BR_HASH_SIZE; i++) {
struct hlist_node *h, *g;
hlist_for_each_safe(h, g, &br->hash[i]) {
struct net_bridge_fdb_entry *f
= hlist_entry(h, struct net_bridge_fdb_entry, hlist);
if (f->dst != p)
continue;
if (f->is_static && !do_all)
continue;
/*
* if multiple ports all have the same device address
* then when one port is deleted, assign
* the local entry to other port
*/
if (f->is_local) {
struct net_bridge_port *op;
list_for_each_entry(op, &br->port_list, list) {
if (op != p &&
ether_addr_equal(op->dev->dev_addr,
f->addr.addr)) {
f->dst = op;
goto skip_delete;
}
}
}
fdb_delete(br, f);
skip_delete: ;
}
}
spin_unlock_bh(&br->hash_lock);
}
/* No locking or refcounting, assumes caller has rcu_read_lock */
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
const unsigned char *addr,
__u16 vid)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;
hlist_for_each_entry_rcu(fdb, h,
&br->hash[br_mac_hash(addr, vid)], hlist) {
if (ether_addr_equal(fdb->addr.addr, addr) &&
fdb->vlan_id == vid) {
if (unlikely(has_expired(br, fdb)))
break;
return fdb;
}
}
return NULL;
}
#if IS_ENABLED(CONFIG_ATM_LANE)
/* Interface used by ATM LANE hook to test
* if an addr is on some other bridge port */
int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
{
struct net_bridge_fdb_entry *fdb;
struct net_bridge_port *port;
int ret;
rcu_read_lock();
port = br_port_get_rcu(dev);
if (!port)
ret = 0;
else {
fdb = __br_fdb_get(port->br, addr, 0);
ret = fdb && fdb->dst && fdb->dst->dev != dev &&
fdb->dst->state == BR_STATE_FORWARDING;
}
rcu_read_unlock();
return ret;
}
#endif /* CONFIG_ATM_LANE */
/*
* Fill buffer with forwarding table records in
* the API format.
*/
int br_fdb_fillbuf(struct net_bridge *br, void *buf,
unsigned long maxnum, unsigned long skip)
{
struct __fdb_entry *fe = buf;
int i, num = 0;
struct hlist_node *h;
struct net_bridge_fdb_entry *f;
memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
rcu_read_lock();
for (i = 0; i < BR_HASH_SIZE; i++) {
hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
if (num >= maxnum)
goto out;
if (has_expired(br, f))
continue;
/* ignore pseudo entry for local MAC address */
if (!f->dst)
continue;
if (skip) {
--skip;
continue;
}
/* convert from internal format to API */
memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN);
/* due to ABI compat need to split into hi/lo */
fe->port_no = f->dst->port_no;
fe->port_hi = f->dst->port_no >> 8;
fe->is_local = f->is_local;
if (!f->is_static)
fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
++fe;
++num;
}
}
out:
rcu_read_unlock();
return num;
}
static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
const unsigned char *addr,
__u16 vid)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;
hlist_for_each_entry(fdb, h, head, hlist) {
if (ether_addr_equal(fdb->addr.addr, addr) &&
fdb->vlan_id == vid)
return fdb;
}
return NULL;
}
static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
const unsigned char *addr,
__u16 vid)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;
hlist_for_each_entry_rcu(fdb, h, head, hlist) {
if (ether_addr_equal(fdb->addr.addr, addr) &&
fdb->vlan_id == vid)
return fdb;
}
return NULL;
}
static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
struct net_bridge_port *source,
const unsigned char *addr,
__u16 vid)
{
struct net_bridge_fdb_entry *fdb;
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (fdb) {
memcpy(fdb->addr.addr, addr, ETH_ALEN);
fdb->dst = source;
fdb->vlan_id = vid;
fdb->is_local = 0;
fdb->is_static = 0;
fdb->updated = fdb->used = jiffies;
hlist_add_head_rcu(&fdb->hlist, head);
}
return fdb;
}
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
if (!is_valid_ether_addr(addr))
return -EINVAL;
fdb = fdb_find(head, addr, vid);
if (fdb) {
/* it is okay to have multiple ports with same
* address, just use the first one.
*/
if (fdb->is_local)
return 0;
br_warn(br, "adding interface %s with same address "
"as a received packet\n",
source->dev->name);
fdb_delete(br, fdb);
}
fdb = fdb_create(head, source, addr, vid);
if (!fdb)
return -ENOMEM;
fdb->is_local = fdb->is_static = 1;
fdb_notify(br, fdb, RTM_NEWNEIGH);
return 0;
}
/* Add entry for local address of interface */
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
int ret;
spin_lock_bh(&br->hash_lock);
ret = fdb_insert(br, source, addr, vid);
spin_unlock_bh(&br->hash_lock);
return ret;
}
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
/* some users want to always flood. */
if (hold_time(br) == 0)
return;
/* ignore packets unless we are using this port */
if (!(source->state == BR_STATE_LEARNING ||
source->state == BR_STATE_FORWARDING))
return;
fdb = fdb_find_rcu(head, addr, vid);
if (likely(fdb)) {
/* attempt to update an entry for a local interface */
if (unlikely(fdb->is_local)) {
if (net_ratelimit())
br_warn(br, "received packet on %s with "
"own address as source address\n",
source->dev->name);
} else {
/* fastpath: update of existing entry */
fdb->dst = source;
fdb->updated = jiffies;
}
} else {
spin_lock(&br->hash_lock);
if (likely(!fdb_find(head, addr, vid))) {
fdb = fdb_create(head, source, addr, vid);
if (fdb)
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
/* else we lose race and someone else inserts
* it first, don't bother updating
*/
spin_unlock(&br->hash_lock);
}
}
static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
{
if (fdb->is_local)
return NUD_PERMANENT;
else if (fdb->is_static)
return NUD_NOARP;
else if (has_expired(fdb->dst->br, fdb))
return NUD_STALE;
else
return NUD_REACHABLE;
}
static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb,
u32 portid, u32 seq, int type, unsigned int flags)
{
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
return -EMSGSIZE;
ndm = nlmsg_data(nlh);
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
ndm->ndm_flags = 0;
ndm->ndm_type = 0;
ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(fdb);
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
goto nla_put_failure;
ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
ci.ndm_confirmed = 0;
ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
ci.ndm_refcnt = 0;
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
goto nla_put_failure;
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static inline size_t fdb_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type)
{
struct net *net = dev_net(br->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
return;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
/* Dump information about entries, in response to GETNEIGH */
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
int br_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
int idx)
{
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
struct net_bridge *br = netdev_priv(dev);
int i;
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
if (!(dev->priv_flags & IFF_EBRIDGE))
goto out;
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
for (i = 0; i < BR_HASH_SIZE; i++) {
struct hlist_node *h;
struct net_bridge_fdb_entry *f;
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
if (idx < cb->args[0])
goto skip;
if (fdb_fill_info(skb, br, f,
NETLINK_CB(cb->skb).portid,
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) < 0)
break;
skip:
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
++idx;
}
}
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
out:
return idx;
}
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
__u16 state, __u16 flags, __u16 vid)
{
struct net_bridge *br = source->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
fdb = fdb_find(head, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
return -ENOENT;
fdb = fdb_create(head, source, addr, vid);
if (!fdb)
return -ENOMEM;
fdb_notify(br, fdb, RTM_NEWNEIGH);
} else {
if (flags & NLM_F_EXCL)
return -EEXIST;
}
if (fdb_to_nud(fdb) != state) {
if (state & NUD_PERMANENT)
fdb->is_local = fdb->is_static = 1;
else if (state & NUD_NOARP) {
fdb->is_local = 0;
fdb->is_static = 1;
} else
fdb->is_local = fdb->is_static = 0;
fdb->updated = fdb->used = jiffies;
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
return 0;
}
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
const unsigned char *addr, u16 nlh_flags, u16 vid)
{
int err = 0;
if (ndm->ndm_flags & NTF_USE) {
rcu_read_lock();
br_fdb_update(p->br, p, addr, vid);
rcu_read_unlock();
} else {
spin_lock_bh(&p->br->hash_lock);
err = fdb_add_entry(p, addr, ndm->ndm_state,
nlh_flags, vid);
spin_unlock_bh(&p->br->hash_lock);
}
return err;
}
/* Add new permanent fdb entry with RTM_NEWNEIGH */
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 nlh_flags)
{
struct net_bridge_port *p;
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
int err = 0;
struct net_port_vlans *pv;
unsigned short vid = VLAN_N_VID;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
return -EINVAL;
}
if (tb[NDA_VLAN]) {
if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
return -EINVAL;
}
vid = nla_get_u16(tb[NDA_VLAN]);
if (vid >= VLAN_N_VID) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
}
}
p = br_port_get_rtnl(dev);
if (p == NULL) {
pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
dev->name);
return -EINVAL;
}
pv = nbp_get_vlan_info(p);
if (vid != VLAN_N_VID) {
if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
pr_info("bridge: RTM_NEWNEIGH with unconfigured "
"vlan %d on port %s\n", vid, dev->name);
return -EINVAL;
}
/* VID was specified, so use it. */
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
} else {
if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
goto out;
}
/* We have vlans configured on this port and user didn't
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
while (vid < BR_VLAN_BITMAP_LEN) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
if (err)
goto out;
vid = find_next_bit(pv->vlan_bitmap,
BR_VLAN_BITMAP_LEN, vid+1);
}
}
out:
return err;
}
int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
u16 vlan)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
struct net_bridge_fdb_entry *fdb;
fdb = fdb_find(head, addr, vlan);
if (!fdb)
return -ENOENT;
fdb_delete(br, fdb);
return 0;
}
static int __br_fdb_delete(struct net_bridge_port *p,
const unsigned char *addr, u16 vid)
{
int err;
spin_lock_bh(&p->br->hash_lock);
err = fdb_delete_by_addr(p->br, addr, vid);
spin_unlock_bh(&p->br->hash_lock);
return err;
}
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr)
{
struct net_bridge_port *p;
int err;
struct net_port_vlans *pv;
unsigned short vid = VLAN_N_VID;
if (tb[NDA_VLAN]) {
if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
return -EINVAL;
}
vid = nla_get_u16(tb[NDA_VLAN]);
if (vid >= VLAN_N_VID) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
}
}
p = br_port_get_rtnl(dev);
if (p == NULL) {
pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
dev->name);
return -EINVAL;
}
pv = nbp_get_vlan_info(p);
if (vid != VLAN_N_VID) {
if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
pr_info("bridge: RTM_DELNEIGH with unconfigured "
"vlan %d on port %s\n", vid, dev->name);
return -EINVAL;
}
err = __br_fdb_delete(p, addr, vid);
} else {
if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
err = __br_fdb_delete(p, addr, 0);
goto out;
}
/* We have vlans configured on this port and user didn't
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
err = -ENOENT;
vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
while (vid < BR_VLAN_BITMAP_LEN) {
err &= __br_fdb_delete(p, addr, vid);
vid = find_next_bit(pv->vlan_bitmap,
BR_VLAN_BITMAP_LEN, vid+1);
}
}
out:
return err;
}