2019-05-27 06:55:01 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2017-02-01 06:59:54 +00:00
|
|
|
/*
|
|
|
|
* Bridge per vlan tunnel port dst_metadata handling code
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Roopa Prabhu <roopa@cumulusnetworks.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/rtnetlink.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <net/switchdev.h>
|
|
|
|
#include <net/dst_metadata.h>
|
|
|
|
|
|
|
|
#include "br_private.h"
|
|
|
|
#include "br_private_tunnel.h"
|
|
|
|
|
|
|
|
static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg,
|
|
|
|
const void *ptr)
|
|
|
|
{
|
|
|
|
const struct net_bridge_vlan *vle = ptr;
|
|
|
|
__be64 tunid = *(__be64 *)arg->key;
|
|
|
|
|
|
|
|
return vle->tinfo.tunnel_id != tunid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct rhashtable_params br_vlan_tunnel_rht_params = {
|
|
|
|
.head_offset = offsetof(struct net_bridge_vlan, tnode),
|
|
|
|
.key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id),
|
|
|
|
.key_len = sizeof(__be64),
|
|
|
|
.nelem_hint = 3,
|
|
|
|
.obj_cmpfn = br_vlan_tunid_cmp,
|
|
|
|
.automatic_shrinking = true,
|
|
|
|
};
|
|
|
|
|
2017-02-01 06:59:55 +00:00
|
|
|
static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
|
2021-03-22 10:38:19 +00:00
|
|
|
__be64 tunnel_id)
|
2017-02-01 06:59:55 +00:00
|
|
|
{
|
|
|
|
return rhashtable_lookup_fast(tbl, &tunnel_id,
|
|
|
|
br_vlan_tunnel_rht_params);
|
|
|
|
}
|
|
|
|
|
2021-06-10 12:04:10 +00:00
|
|
|
static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
|
|
|
|
{
|
|
|
|
struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
|
|
|
|
|
|
|
|
WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
|
|
|
|
RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
|
|
|
|
dst_release(&tdst->dst);
|
|
|
|
}
|
|
|
|
|
2017-02-01 06:59:54 +00:00
|
|
|
void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
|
|
|
|
struct net_bridge_vlan *vlan)
|
|
|
|
{
|
2021-06-10 12:04:10 +00:00
|
|
|
if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
|
2017-02-01 06:59:54 +00:00
|
|
|
return;
|
|
|
|
rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
|
|
|
|
br_vlan_tunnel_rht_params);
|
2021-06-10 12:04:10 +00:00
|
|
|
vlan_tunnel_info_release(vlan);
|
2017-02-01 06:59:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
|
|
|
|
struct net_bridge_vlan *vlan, u32 tun_id)
|
|
|
|
{
|
2021-06-10 12:04:10 +00:00
|
|
|
struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
|
2017-02-01 06:59:54 +00:00
|
|
|
__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
|
ip_tunnel: convert __be16 tunnel flags to bitmaps
Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT
have been defined as __be16. Now all of those 16 bits are occupied
and there's no more free space for new flags.
It can't be simply switched to a bigger container with no
adjustments to the values, since it's an explicit Endian storage,
and on LE systems (__be16)0x0001 equals to
(__be64)0x0001000000000000.
We could probably define new 64-bit flags depending on the
Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on
LE, but that would introduce an Endianness dependency and spawn a
ton of Sparse warnings. To mitigate them, all of those places which
were adjusted with this change would be touched anyway, so why not
define stuff properly if there's no choice.
Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the
value already coded and a fistful of <16 <-> bitmap> converters and
helpers. The two flags which have a different bit position are
SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as
__cpu_to_be16(), but as (__force __be16), i.e. had different
positions on LE and BE. Now they both have strongly defined places.
Change all __be16 fields which were used to store those flags, to
IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) ->
unsigned long[1] for now, and replace all TUNNEL_* occurrences to
their bitmap counterparts. Use the converters in the places which talk
to the userspace, hardware (NFP) or other hosts (GRE header). The rest
must explicitly use the new flags only. This must be done at once,
otherwise there will be too many conversions throughout the code in
the intermediate commits.
Finally, disable the old __be16 flags for use in the kernel code
(except for the two 'irregular' flags mentioned above), to prevent
any accidental (mis)use of them. For the userspace, nothing is
changed, only additions were made.
Most noticeable bloat-o-meter difference (.text):
vmlinux: 307/-1 (306)
gre.ko: 62/0 (62)
ip_gre.ko: 941/-217 (724) [*]
ip_tunnel.ko: 390/-900 (-510) [**]
ip_vti.ko: 138/0 (138)
ip6_gre.ko: 534/-18 (516) [*]
ip6_tunnel.ko: 118/-10 (108)
[*] gre_flags_to_tnl_flags() grew, but still is inlined
[**] ip_tunnel_find() got uninlined, hence such decrease
The average code size increase in non-extreme case is 100-200 bytes
per module, mostly due to sizeof(long) > sizeof(__be16), as
%__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers
are able to expand the majority of bitmap_*() calls here into direct
operations on scalars.
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-03-27 15:23:53 +00:00
|
|
|
IP_TUNNEL_DECLARE_FLAGS(flags) = { };
|
2017-02-01 06:59:54 +00:00
|
|
|
int err;
|
|
|
|
|
2021-06-10 12:04:10 +00:00
|
|
|
if (metadata)
|
2017-02-01 06:59:54 +00:00
|
|
|
return -EEXIST;
|
|
|
|
|
ip_tunnel: convert __be16 tunnel flags to bitmaps
Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT
have been defined as __be16. Now all of those 16 bits are occupied
and there's no more free space for new flags.
It can't be simply switched to a bigger container with no
adjustments to the values, since it's an explicit Endian storage,
and on LE systems (__be16)0x0001 equals to
(__be64)0x0001000000000000.
We could probably define new 64-bit flags depending on the
Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on
LE, but that would introduce an Endianness dependency and spawn a
ton of Sparse warnings. To mitigate them, all of those places which
were adjusted with this change would be touched anyway, so why not
define stuff properly if there's no choice.
Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the
value already coded and a fistful of <16 <-> bitmap> converters and
helpers. The two flags which have a different bit position are
SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as
__cpu_to_be16(), but as (__force __be16), i.e. had different
positions on LE and BE. Now they both have strongly defined places.
Change all __be16 fields which were used to store those flags, to
IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) ->
unsigned long[1] for now, and replace all TUNNEL_* occurrences to
their bitmap counterparts. Use the converters in the places which talk
to the userspace, hardware (NFP) or other hosts (GRE header). The rest
must explicitly use the new flags only. This must be done at once,
otherwise there will be too many conversions throughout the code in
the intermediate commits.
Finally, disable the old __be16 flags for use in the kernel code
(except for the two 'irregular' flags mentioned above), to prevent
any accidental (mis)use of them. For the userspace, nothing is
changed, only additions were made.
Most noticeable bloat-o-meter difference (.text):
vmlinux: 307/-1 (306)
gre.ko: 62/0 (62)
ip_gre.ko: 941/-217 (724) [*]
ip_tunnel.ko: 390/-900 (-510) [**]
ip_vti.ko: 138/0 (138)
ip6_gre.ko: 534/-18 (516) [*]
ip6_tunnel.ko: 118/-10 (108)
[*] gre_flags_to_tnl_flags() grew, but still is inlined
[**] ip_tunnel_find() got uninlined, hence such decrease
The average code size increase in non-extreme case is 100-200 bytes
per module, mostly due to sizeof(long) > sizeof(__be16), as
%__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers
are able to expand the majority of bitmap_*() calls here into direct
operations on scalars.
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-03-27 15:23:53 +00:00
|
|
|
__set_bit(IP_TUNNEL_KEY_BIT, flags);
|
|
|
|
metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, key, 0);
|
2017-02-01 06:59:54 +00:00
|
|
|
if (!metadata)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
|
2021-06-10 12:04:10 +00:00
|
|
|
rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
|
|
|
|
WRITE_ONCE(vlan->tinfo.tunnel_id, key);
|
2017-02-01 06:59:54 +00:00
|
|
|
|
|
|
|
err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
|
|
|
|
br_vlan_tunnel_rht_params);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
out:
|
2021-06-10 12:04:10 +00:00
|
|
|
vlan_tunnel_info_release(vlan);
|
2017-02-01 06:59:54 +00:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Must be protected by RTNL.
|
|
|
|
* Must be called with vid in range from 1 to 4094 inclusive.
|
|
|
|
*/
|
2020-03-17 12:08:34 +00:00
|
|
|
int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid,
|
|
|
|
u32 tun_id)
|
2017-02-01 06:59:54 +00:00
|
|
|
{
|
|
|
|
struct net_bridge_vlan_group *vg;
|
|
|
|
struct net_bridge_vlan *vlan;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
vg = nbp_vlan_group(port);
|
|
|
|
vlan = br_vlan_find(vg, vid);
|
|
|
|
if (!vlan)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return __vlan_tunnel_info_add(vg, vlan, tun_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Must be protected by RTNL.
|
|
|
|
* Must be called with vid in range from 1 to 4094 inclusive.
|
|
|
|
*/
|
2020-03-17 12:08:34 +00:00
|
|
|
int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid)
|
2017-02-01 06:59:54 +00:00
|
|
|
{
|
|
|
|
struct net_bridge_vlan_group *vg;
|
|
|
|
struct net_bridge_vlan *v;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
vg = nbp_vlan_group(port);
|
|
|
|
v = br_vlan_find(vg, vid);
|
|
|
|
if (!v)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
vlan_tunnel_info_del(vg, v);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __vlan_tunnel_info_flush(struct net_bridge_vlan_group *vg)
|
|
|
|
{
|
|
|
|
struct net_bridge_vlan *vlan, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
|
|
|
|
vlan_tunnel_info_del(vg, vlan);
|
|
|
|
}
|
|
|
|
|
|
|
|
void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port)
|
|
|
|
{
|
|
|
|
struct net_bridge_vlan_group *vg;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
vg = nbp_vlan_group(port);
|
|
|
|
__vlan_tunnel_info_flush(vg);
|
|
|
|
}
|
|
|
|
|
|
|
|
int vlan_tunnel_init(struct net_bridge_vlan_group *vg)
|
|
|
|
{
|
|
|
|
return rhashtable_init(&vg->tunnel_hash, &br_vlan_tunnel_rht_params);
|
|
|
|
}
|
|
|
|
|
|
|
|
void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg)
|
|
|
|
{
|
|
|
|
rhashtable_destroy(&vg->tunnel_hash);
|
|
|
|
}
|
2017-02-01 06:59:55 +00:00
|
|
|
|
2021-08-23 10:21:18 +00:00
|
|
|
void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
|
|
|
|
struct net_bridge_port *p,
|
|
|
|
struct net_bridge_vlan_group *vg)
|
2017-02-01 06:59:55 +00:00
|
|
|
{
|
|
|
|
struct ip_tunnel_info *tinfo = skb_tunnel_info(skb);
|
|
|
|
struct net_bridge_vlan *vlan;
|
|
|
|
|
|
|
|
if (!vg || !tinfo)
|
2021-08-23 10:21:18 +00:00
|
|
|
return;
|
2017-02-01 06:59:55 +00:00
|
|
|
|
|
|
|
/* if already tagged, ignore */
|
|
|
|
if (skb_vlan_tagged(skb))
|
2021-08-23 10:21:18 +00:00
|
|
|
return;
|
2017-02-01 06:59:55 +00:00
|
|
|
|
|
|
|
/* lookup vid, given tunnel id */
|
|
|
|
vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id);
|
|
|
|
if (!vlan)
|
2021-08-23 10:21:18 +00:00
|
|
|
return;
|
2017-02-01 06:59:55 +00:00
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
|
|
|
|
__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
|
|
|
|
}
|
|
|
|
|
|
|
|
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
|
|
|
|
struct net_bridge_vlan *vlan)
|
|
|
|
{
|
ip_tunnel: convert __be16 tunnel flags to bitmaps
Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT
have been defined as __be16. Now all of those 16 bits are occupied
and there's no more free space for new flags.
It can't be simply switched to a bigger container with no
adjustments to the values, since it's an explicit Endian storage,
and on LE systems (__be16)0x0001 equals to
(__be64)0x0001000000000000.
We could probably define new 64-bit flags depending on the
Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on
LE, but that would introduce an Endianness dependency and spawn a
ton of Sparse warnings. To mitigate them, all of those places which
were adjusted with this change would be touched anyway, so why not
define stuff properly if there's no choice.
Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the
value already coded and a fistful of <16 <-> bitmap> converters and
helpers. The two flags which have a different bit position are
SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as
__cpu_to_be16(), but as (__force __be16), i.e. had different
positions on LE and BE. Now they both have strongly defined places.
Change all __be16 fields which were used to store those flags, to
IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) ->
unsigned long[1] for now, and replace all TUNNEL_* occurrences to
their bitmap counterparts. Use the converters in the places which talk
to the userspace, hardware (NFP) or other hosts (GRE header). The rest
must explicitly use the new flags only. This must be done at once,
otherwise there will be too many conversions throughout the code in
the intermediate commits.
Finally, disable the old __be16 flags for use in the kernel code
(except for the two 'irregular' flags mentioned above), to prevent
any accidental (mis)use of them. For the userspace, nothing is
changed, only additions were made.
Most noticeable bloat-o-meter difference (.text):
vmlinux: 307/-1 (306)
gre.ko: 62/0 (62)
ip_gre.ko: 941/-217 (724) [*]
ip_tunnel.ko: 390/-900 (-510) [**]
ip_vti.ko: 138/0 (138)
ip6_gre.ko: 534/-18 (516) [*]
ip6_tunnel.ko: 118/-10 (108)
[*] gre_flags_to_tnl_flags() grew, but still is inlined
[**] ip_tunnel_find() got uninlined, hence such decrease
The average code size increase in non-extreme case is 100-200 bytes
per module, mostly due to sizeof(long) > sizeof(__be16), as
%__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers
are able to expand the majority of bitmap_*() calls here into direct
operations on scalars.
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-03-27 15:23:53 +00:00
|
|
|
IP_TUNNEL_DECLARE_FLAGS(flags) = { };
|
2021-06-10 12:04:10 +00:00
|
|
|
struct metadata_dst *tunnel_dst;
|
|
|
|
__be64 tunnel_id;
|
2017-02-01 06:59:55 +00:00
|
|
|
int err;
|
|
|
|
|
2021-06-10 12:04:10 +00:00
|
|
|
if (!vlan)
|
2017-02-01 06:59:55 +00:00
|
|
|
return 0;
|
|
|
|
|
2021-06-10 12:04:10 +00:00
|
|
|
tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
|
|
|
|
if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
|
2017-02-01 06:59:55 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
err = skb_vlan_pop(skb);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bridge: Add backup nexthop ID support
Add a new bridge port attribute that allows attaching a nexthop object
ID to an skb that is redirected to a backup bridge port with VLAN
tunneling enabled.
Specifically, when redirecting a known unicast packet, read the backup
nexthop ID from the bridge port that lost its carrier and set it in the
bridge control block of the skb before forwarding it via the backup
port. Note that reading the ID from the bridge port should not result in
a cache miss as the ID is added next to the 'backup_port' field that was
already accessed. After this change, the 'state' field still stays on
the first cache line, together with other data path related fields such
as 'flags and 'vlgrp':
struct net_bridge_port {
struct net_bridge * br; /* 0 8 */
struct net_device * dev; /* 8 8 */
netdevice_tracker dev_tracker; /* 16 0 */
struct list_head list; /* 16 16 */
long unsigned int flags; /* 32 8 */
struct net_bridge_vlan_group * vlgrp; /* 40 8 */
struct net_bridge_port * backup_port; /* 48 8 */
u32 backup_nhid; /* 56 4 */
u8 priority; /* 60 1 */
u8 state; /* 61 1 */
u16 port_no; /* 62 2 */
/* --- cacheline 1 boundary (64 bytes) --- */
[...]
} __attribute__((__aligned__(8)));
When forwarding an skb via a bridge port that has VLAN tunneling
enabled, check if the backup nexthop ID stored in the bridge control
block is valid (i.e., not zero). If so, instead of attaching the
pre-allocated metadata (that only has the tunnel key set), allocate a
new metadata, set both the tunnel key and the nexthop object ID and
attach it to the skb.
By default, do not dump the new attribute to user space as a value of
zero is an invalid nexthop object ID.
The above is useful for EVPN multihoming. When one of the links
composing an Ethernet Segment (ES) fails, traffic needs to be redirected
towards the host via one of the other ES peers. For example, if a host
is multihomed to three different VTEPs, the backup port of each ES link
needs to be set to the VXLAN device and the backup nexthop ID needs to
point to an FDB nexthop group that includes the IP addresses of the
other two VTEPs. The VXLAN driver will extract the ID from the metadata
of the redirected skb, calculate its flow hash and forward it towards
one of the other VTEPs. If the ID does not exist, or represents an
invalid nexthop object, the VXLAN driver will drop the skb. This
relieves the bridge driver from the need to validate the ID.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-07-17 08:12:28 +00:00
|
|
|
if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
|
ip_tunnel: convert __be16 tunnel flags to bitmaps
Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT
have been defined as __be16. Now all of those 16 bits are occupied
and there's no more free space for new flags.
It can't be simply switched to a bigger container with no
adjustments to the values, since it's an explicit Endian storage,
and on LE systems (__be16)0x0001 equals to
(__be64)0x0001000000000000.
We could probably define new 64-bit flags depending on the
Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on
LE, but that would introduce an Endianness dependency and spawn a
ton of Sparse warnings. To mitigate them, all of those places which
were adjusted with this change would be touched anyway, so why not
define stuff properly if there's no choice.
Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the
value already coded and a fistful of <16 <-> bitmap> converters and
helpers. The two flags which have a different bit position are
SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as
__cpu_to_be16(), but as (__force __be16), i.e. had different
positions on LE and BE. Now they both have strongly defined places.
Change all __be16 fields which were used to store those flags, to
IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) ->
unsigned long[1] for now, and replace all TUNNEL_* occurrences to
their bitmap counterparts. Use the converters in the places which talk
to the userspace, hardware (NFP) or other hosts (GRE header). The rest
must explicitly use the new flags only. This must be done at once,
otherwise there will be too many conversions throughout the code in
the intermediate commits.
Finally, disable the old __be16 flags for use in the kernel code
(except for the two 'irregular' flags mentioned above), to prevent
any accidental (mis)use of them. For the userspace, nothing is
changed, only additions were made.
Most noticeable bloat-o-meter difference (.text):
vmlinux: 307/-1 (306)
gre.ko: 62/0 (62)
ip_gre.ko: 941/-217 (724) [*]
ip_tunnel.ko: 390/-900 (-510) [**]
ip_vti.ko: 138/0 (138)
ip6_gre.ko: 534/-18 (516) [*]
ip6_tunnel.ko: 118/-10 (108)
[*] gre_flags_to_tnl_flags() grew, but still is inlined
[**] ip_tunnel_find() got uninlined, hence such decrease
The average code size increase in non-extreme case is 100-200 bytes
per module, mostly due to sizeof(long) > sizeof(__be16), as
%__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers
are able to expand the majority of bitmap_*() calls here into direct
operations on scalars.
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2024-03-27 15:23:53 +00:00
|
|
|
__set_bit(IP_TUNNEL_KEY_BIT, flags);
|
|
|
|
tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, flags,
|
bridge: Add backup nexthop ID support
Add a new bridge port attribute that allows attaching a nexthop object
ID to an skb that is redirected to a backup bridge port with VLAN
tunneling enabled.
Specifically, when redirecting a known unicast packet, read the backup
nexthop ID from the bridge port that lost its carrier and set it in the
bridge control block of the skb before forwarding it via the backup
port. Note that reading the ID from the bridge port should not result in
a cache miss as the ID is added next to the 'backup_port' field that was
already accessed. After this change, the 'state' field still stays on
the first cache line, together with other data path related fields such
as 'flags and 'vlgrp':
struct net_bridge_port {
struct net_bridge * br; /* 0 8 */
struct net_device * dev; /* 8 8 */
netdevice_tracker dev_tracker; /* 16 0 */
struct list_head list; /* 16 16 */
long unsigned int flags; /* 32 8 */
struct net_bridge_vlan_group * vlgrp; /* 40 8 */
struct net_bridge_port * backup_port; /* 48 8 */
u32 backup_nhid; /* 56 4 */
u8 priority; /* 60 1 */
u8 state; /* 61 1 */
u16 port_no; /* 62 2 */
/* --- cacheline 1 boundary (64 bytes) --- */
[...]
} __attribute__((__aligned__(8)));
When forwarding an skb via a bridge port that has VLAN tunneling
enabled, check if the backup nexthop ID stored in the bridge control
block is valid (i.e., not zero). If so, instead of attaching the
pre-allocated metadata (that only has the tunnel key set), allocate a
new metadata, set both the tunnel key and the nexthop object ID and
attach it to the skb.
By default, do not dump the new attribute to user space as a value of
zero is an invalid nexthop object ID.
The above is useful for EVPN multihoming. When one of the links
composing an Ethernet Segment (ES) fails, traffic needs to be redirected
towards the host via one of the other ES peers. For example, if a host
is multihomed to three different VTEPs, the backup port of each ES link
needs to be set to the VXLAN device and the backup nexthop ID needs to
point to an FDB nexthop group that includes the IP addresses of the
other two VTEPs. The VXLAN driver will extract the ID from the metadata
of the redirected skb, calculate its flow hash and forward it towards
one of the other VTEPs. If the ID does not exist, or represents an
invalid nexthop object, the VXLAN driver will drop the skb. This
relieves the bridge driver from the need to validate the ID.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-07-17 08:12:28 +00:00
|
|
|
tunnel_id, 0);
|
|
|
|
if (!tunnel_dst)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
|
|
|
|
IP_TUNNEL_INFO_BRIDGE;
|
|
|
|
tunnel_dst->u.tun_info.key.nhid =
|
|
|
|
BR_INPUT_SKB_CB(skb)->backup_nhid;
|
|
|
|
skb_dst_set(skb, &tunnel_dst->dst);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-10 12:04:10 +00:00
|
|
|
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
|
2021-06-10 12:04:11 +00:00
|
|
|
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
|
|
|
|
skb_dst_set(skb, &tunnel_dst->dst);
|
2017-02-01 06:59:55 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|