Merge branch 'l3mdev_saddr_op'
David Ahern says: ==================== net: Add saddr op to l3mdev and vrf First 2 patches are re-sends of patches that got lost in the ethosphere Tuesday; they were part of the first round of l3mdev conversions. Next 3 handle the source address lookup for raw and datagram sockets bound to a VRF device. The conversion to the get_saddr op also fixes locally originated TCP packets showing up at the VRF device. The use of the FLOWI_FLAG_L3MDEV_SRC flag in ip_route_connect_init was causing locally generated packets to skip the VRF device. v2 - rebased to top of net-next per device delete fix and hash based multipath patches ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
390a4bee5c
@ -36,11 +36,12 @@
|
||||
#include <net/addrconf.h>
|
||||
#include <net/l3mdev.h>
|
||||
|
||||
#define RT_FL_TOS(oldflp4) \
|
||||
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
|
||||
|
||||
#define DRV_NAME "vrf"
|
||||
#define DRV_VERSION "1.0"
|
||||
|
||||
#define vrf_is_slave(dev) ((dev)->flags & IFF_SLAVE)
|
||||
|
||||
#define vrf_master_get_rcu(dev) \
|
||||
((struct net_device *)rcu_dereference(dev->rx_handler_data))
|
||||
|
||||
@ -208,7 +209,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
|
||||
.flowi4_oif = vrf_dev->ifindex,
|
||||
.flowi4_iif = LOOPBACK_IFINDEX,
|
||||
.flowi4_tos = RT_TOS(ip4h->tos),
|
||||
.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC |
|
||||
.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
|
||||
FLOWI_FLAG_SKIP_NH_OIF,
|
||||
.daddr = ip4h->daddr,
|
||||
};
|
||||
@ -433,7 +434,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
|
||||
if (ret < 0)
|
||||
goto out_unregister;
|
||||
|
||||
port_dev->flags |= IFF_SLAVE;
|
||||
port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
|
||||
__vrf_insert_slave(queue, slave);
|
||||
cycle_netdev(port_dev);
|
||||
|
||||
@ -448,7 +449,7 @@ out_fail:
|
||||
|
||||
static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
|
||||
{
|
||||
if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
|
||||
if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev))
|
||||
return -EINVAL;
|
||||
|
||||
return do_vrf_add_slave(dev, port_dev);
|
||||
@ -462,7 +463,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
|
||||
struct slave *slave;
|
||||
|
||||
netdev_upper_dev_unlink(port_dev, dev);
|
||||
port_dev->flags &= ~IFF_SLAVE;
|
||||
port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
|
||||
|
||||
netdev_rx_handler_unregister(port_dev);
|
||||
|
||||
@ -545,7 +546,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
|
||||
{
|
||||
struct rtable *rth = NULL;
|
||||
|
||||
if (!(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
|
||||
if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
|
||||
struct net_vrf *vrf = netdev_priv(dev);
|
||||
|
||||
rth = vrf->rth;
|
||||
@ -555,9 +556,41 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
|
||||
return rth;
|
||||
}
|
||||
|
||||
/* called under rcu_read_lock */
|
||||
static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
|
||||
{
|
||||
struct fib_result res = { .tclassid = 0 };
|
||||
struct net *net = dev_net(dev);
|
||||
u32 orig_tos = fl4->flowi4_tos;
|
||||
u8 flags = fl4->flowi4_flags;
|
||||
u8 scope = fl4->flowi4_scope;
|
||||
u8 tos = RT_FL_TOS(fl4);
|
||||
|
||||
if (unlikely(!fl4->daddr))
|
||||
return;
|
||||
|
||||
fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
|
||||
fl4->flowi4_iif = LOOPBACK_IFINDEX;
|
||||
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
|
||||
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
|
||||
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
|
||||
|
||||
if (!fib_lookup(net, fl4, &res, 0)) {
|
||||
if (res.type == RTN_LOCAL)
|
||||
fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
|
||||
else
|
||||
fib_select_path(net, &res, fl4, -1);
|
||||
}
|
||||
|
||||
fl4->flowi4_flags = flags;
|
||||
fl4->flowi4_tos = orig_tos;
|
||||
fl4->flowi4_scope = scope;
|
||||
}
|
||||
|
||||
static const struct l3mdev_ops vrf_l3mdev_ops = {
|
||||
.l3mdev_fib_table = vrf_fib_table,
|
||||
.l3mdev_get_rtable = vrf_get_rtable,
|
||||
.l3mdev_get_saddr = vrf_get_saddr,
|
||||
};
|
||||
|
||||
static void vrf_get_drvinfo(struct net_device *dev,
|
||||
@ -672,7 +705,7 @@ static int vrf_device_event(struct notifier_block *unused,
|
||||
if (event == NETDEV_UNREGISTER) {
|
||||
struct net_device *vrf_dev;
|
||||
|
||||
if (!vrf_is_slave(dev) || netif_is_l3_master(dev))
|
||||
if (!netif_is_l3_slave(dev))
|
||||
goto out;
|
||||
|
||||
vrf_dev = netdev_master_upper_dev_get(dev);
|
||||
|
@ -1261,6 +1261,7 @@ struct net_device_ops {
|
||||
* @IFF_L3MDEV_MASTER: device is an L3 master device
|
||||
* @IFF_NO_QUEUE: device can run without qdisc attached
|
||||
* @IFF_OPENVSWITCH: device is a Open vSwitch master
|
||||
* @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
|
||||
*/
|
||||
enum netdev_priv_flags {
|
||||
IFF_802_1Q_VLAN = 1<<0,
|
||||
@ -1286,6 +1287,7 @@ enum netdev_priv_flags {
|
||||
IFF_L3MDEV_MASTER = 1<<20,
|
||||
IFF_NO_QUEUE = 1<<21,
|
||||
IFF_OPENVSWITCH = 1<<22,
|
||||
IFF_L3MDEV_SLAVE = 1<<23,
|
||||
};
|
||||
|
||||
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
|
||||
@ -3830,6 +3832,11 @@ static inline bool netif_is_l3_master(const struct net_device *dev)
|
||||
return dev->priv_flags & IFF_L3MDEV_MASTER;
|
||||
}
|
||||
|
||||
static inline bool netif_is_l3_slave(const struct net_device *dev)
|
||||
{
|
||||
return dev->priv_flags & IFF_L3MDEV_SLAVE;
|
||||
}
|
||||
|
||||
static inline bool netif_is_bridge_master(const struct net_device *dev)
|
||||
{
|
||||
return dev->priv_flags & IFF_EBRIDGE;
|
||||
|
@ -34,7 +34,7 @@ struct flowi_common {
|
||||
__u8 flowic_flags;
|
||||
#define FLOWI_FLAG_ANYSRC 0x01
|
||||
#define FLOWI_FLAG_KNOWN_NH 0x02
|
||||
#define FLOWI_FLAG_VRFSRC 0x04
|
||||
#define FLOWI_FLAG_L3MDEV_SRC 0x04
|
||||
#define FLOWI_FLAG_SKIP_NH_OIF 0x08
|
||||
__u32 flowic_secid;
|
||||
struct flowi_tunnel flowic_tun_key;
|
||||
|
@ -329,6 +329,8 @@ static inline int fib_multipath_hash(__be32 saddr, __be32 daddr)
|
||||
}
|
||||
|
||||
void fib_select_multipath(struct fib_result *res, int hash);
|
||||
void fib_select_path(struct net *net, struct fib_result *res,
|
||||
struct flowi4 *fl4, int mp_hash);
|
||||
|
||||
/* Exported by fib_trie.c */
|
||||
void fib_trie_init(void);
|
||||
|
@ -17,12 +17,16 @@
|
||||
* @l3mdev_fib_table: Get FIB table id to use for lookups
|
||||
*
|
||||
* @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
|
||||
*
|
||||
* @l3mdev_get_saddr: Get source address for a flow
|
||||
*/
|
||||
|
||||
struct l3mdev_ops {
|
||||
u32 (*l3mdev_fib_table)(const struct net_device *dev);
|
||||
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
|
||||
const struct flowi4 *fl4);
|
||||
void (*l3mdev_get_saddr)(struct net_device *dev,
|
||||
struct flowi4 *fl4);
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
@ -100,6 +104,25 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline void l3mdev_get_saddr(struct net *net, int ifindex,
|
||||
struct flowi4 *fl4)
|
||||
{
|
||||
struct net_device *dev;
|
||||
|
||||
if (ifindex) {
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
dev = dev_get_by_index_rcu(net, ifindex);
|
||||
if (dev && netif_is_l3_master(dev) &&
|
||||
dev->l3mdev_ops->l3mdev_get_saddr) {
|
||||
dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
|
||||
@ -144,6 +167,10 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void l3mdev_get_saddr(struct net *net, int ifindex,
|
||||
struct flowi4 *fl4)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _NET_L3MDEV_H_ */
|
||||
|
@ -266,9 +266,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
|
||||
if (inet_sk(sk)->transparent)
|
||||
flow_flags |= FLOWI_FLAG_ANYSRC;
|
||||
|
||||
if (netif_index_is_l3_master(sock_net(sk), oif))
|
||||
flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF;
|
||||
|
||||
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
|
||||
protocol, flow_flags, dst, src, dport, sport);
|
||||
}
|
||||
@ -285,6 +282,10 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
|
||||
ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
|
||||
sport, dport, sk);
|
||||
|
||||
if (!src && oif) {
|
||||
l3mdev_get_saddr(net, oif, fl4);
|
||||
src = fl4->saddr;
|
||||
}
|
||||
if (!dst || !src) {
|
||||
rt = __ip_route_output_key(net, fl4);
|
||||
if (IS_ERR(rt))
|
||||
|
@ -1557,3 +1557,24 @@ void fib_select_multipath(struct fib_result *res, int hash)
|
||||
res->nh_sel = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void fib_select_path(struct net *net, struct fib_result *res,
|
||||
struct flowi4 *fl4, int mp_hash)
|
||||
{
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
|
||||
if (mp_hash < 0)
|
||||
mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr);
|
||||
fib_select_multipath(res, mp_hash);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (!res->prefixlen &&
|
||||
res->table->tb_num_default > 1 &&
|
||||
res->type == RTN_UNICAST && !fl4->flowi4_oif)
|
||||
fib_select_default(fl4, res);
|
||||
|
||||
if (!fl4->saddr)
|
||||
fl4->saddr = FIB_RES_PREFSRC(net, *res);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fib_select_path);
|
||||
|
@ -484,6 +484,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
|
||||
static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
{
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct net *net = sock_net(sk);
|
||||
struct ipcm_cookie ipc;
|
||||
struct rtable *rt = NULL;
|
||||
struct flowi4 fl4;
|
||||
@ -543,7 +544,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
ipc.oif = sk->sk_bound_dev_if;
|
||||
|
||||
if (msg->msg_controllen) {
|
||||
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
|
||||
err = ip_cmsg_send(net, msg, &ipc, false);
|
||||
if (err)
|
||||
goto out;
|
||||
if (ipc.opt)
|
||||
@ -598,6 +599,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
|
||||
daddr, saddr, 0, 0);
|
||||
|
||||
if (!saddr && ipc.oif)
|
||||
l3mdev_get_saddr(net, ipc.oif, &fl4);
|
||||
|
||||
if (!inet->hdrincl) {
|
||||
rfv.msg = msg;
|
||||
rfv.hlen = 0;
|
||||
@ -608,7 +612,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
}
|
||||
|
||||
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
|
||||
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
|
||||
rt = ip_route_output_flow(net, &fl4, sk);
|
||||
if (IS_ERR(rt)) {
|
||||
err = PTR_ERR(rt);
|
||||
rt = NULL;
|
||||
|
@ -2238,21 +2238,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
|
||||
goto make_route;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
|
||||
if (mp_hash < 0)
|
||||
mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr);
|
||||
fib_select_multipath(&res, mp_hash);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (!res.prefixlen &&
|
||||
res.table->tb_num_default > 1 &&
|
||||
res.type == RTN_UNICAST && !fl4->flowi4_oif)
|
||||
fib_select_default(fl4, &res);
|
||||
|
||||
if (!fl4->saddr)
|
||||
fl4->saddr = FIB_RES_PREFSRC(net, res);
|
||||
fib_select_path(net, &res, fl4, mp_hash);
|
||||
|
||||
dev_out = FIB_RES_DEV(res);
|
||||
fl4->flowi4_oif = dev_out->ifindex;
|
||||
|
@ -1017,30 +1017,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
|
||||
fl4 = &fl4_stack;
|
||||
|
||||
/* unconnected socket. If output device is enslaved to a VRF
|
||||
* device lookup source address from VRF table. This mimics
|
||||
* behavior of ip_route_connect{_init}.
|
||||
*/
|
||||
if (netif_index_is_l3_master(net, ipc.oif)) {
|
||||
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
|
||||
RT_SCOPE_UNIVERSE, sk->sk_protocol,
|
||||
(flow_flags | FLOWI_FLAG_VRFSRC |
|
||||
FLOWI_FLAG_SKIP_NH_OIF),
|
||||
faddr, saddr, dport,
|
||||
inet->inet_sport);
|
||||
|
||||
rt = ip_route_output_flow(net, fl4, sk);
|
||||
if (!IS_ERR(rt)) {
|
||||
saddr = fl4->saddr;
|
||||
ip_rt_put(rt);
|
||||
}
|
||||
}
|
||||
|
||||
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
|
||||
RT_SCOPE_UNIVERSE, sk->sk_protocol,
|
||||
flow_flags,
|
||||
faddr, saddr, dport, inet->inet_sport);
|
||||
|
||||
if (!saddr && ipc.oif)
|
||||
l3mdev_get_saddr(net, ipc.oif, fl4);
|
||||
|
||||
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
|
||||
rt = ip_route_output_flow(net, fl4, sk);
|
||||
if (IS_ERR(rt)) {
|
||||
|
@ -26,11 +26,11 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
|
||||
|
||||
if (netif_is_l3_master(dev)) {
|
||||
ifindex = dev->ifindex;
|
||||
} else if (dev->flags & IFF_SLAVE) {
|
||||
} else if (netif_is_l3_slave(dev)) {
|
||||
struct net_device *master;
|
||||
|
||||
master = netdev_master_upper_dev_get_rcu(dev);
|
||||
if (master && netif_is_l3_master(master))
|
||||
if (master)
|
||||
ifindex = master->ifindex;
|
||||
}
|
||||
|
||||
@ -54,7 +54,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev)
|
||||
if (netif_is_l3_master(dev)) {
|
||||
if (dev->l3mdev_ops->l3mdev_fib_table)
|
||||
tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
|
||||
} else if (dev->flags & IFF_SLAVE) {
|
||||
} else if (netif_is_l3_slave(dev)) {
|
||||
/* Users of netdev_master_upper_dev_get_rcu need non-const,
|
||||
* but current inet_*type functions take a const
|
||||
*/
|
||||
@ -62,7 +62,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev)
|
||||
const struct net_device *master;
|
||||
|
||||
master = netdev_master_upper_dev_get_rcu(_dev);
|
||||
if (master && netif_is_l3_master(master) &&
|
||||
if (master &&
|
||||
master->l3mdev_ops->l3mdev_fib_table)
|
||||
tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user