forked from Minki/linux
net: better IFF_XMIT_DST_RELEASE support
Testing xmit_more support with netperf and connected UDP sockets, I found strange dst refcount false sharing. Current handling of IFF_XMIT_DST_RELEASE is not optimal. Dropping dst in validate_xmit_skb() is certainly too late in case packet was queued by cpu X but dequeued by cpu Y The logical point to take care of drop/force is in __dev_queue_xmit() before even taking qdisc lock. As Julian Anastasov pointed out, need for skb_dst() might come from some packet schedulers or classifiers. This patch adds new helper to cleanly express needs of various drivers or qdiscs/classifiers. Drivers that need skb_dst() in their ndo_start_xmit() should call following helper in their setup instead of the prior : dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; -> netif_keep_dst(dev); Instead of using a single bit, we use two bits, one being eventually rebuilt in bonding/team drivers. The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being rebuilt in bonding/team. Eventually, we could add something smarter later. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
fe971b95c2
commit
0287587884
@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev)
|
||||
dev->tx_queue_len = ipoib_sendq_size * 2;
|
||||
dev->features = (NETIF_F_VLAN_CHALLENGED |
|
||||
NETIF_F_HIGHDMA);
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
|
||||
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
|
||||
|
||||
|
@ -74,7 +74,7 @@ static struct net_device * __init ipddp_init(void)
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
strcpy(dev->name, "ipddp%d");
|
||||
|
||||
if (version_printed++ == 0)
|
||||
|
@ -1002,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
|
||||
|
||||
static void bond_compute_features(struct bonding *bond)
|
||||
{
|
||||
unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
|
||||
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
|
||||
IFF_XMIT_DST_RELEASE_PERM;
|
||||
netdev_features_t vlan_features = BOND_VLAN_FEATURES;
|
||||
netdev_features_t enc_features = BOND_ENC_FEATURES;
|
||||
struct net_device *bond_dev = bond->dev;
|
||||
@ -1038,8 +1039,10 @@ done:
|
||||
bond_dev->gso_max_segs = gso_max_segs;
|
||||
netif_set_gso_max_size(bond_dev, gso_max_size);
|
||||
|
||||
flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
|
||||
bond_dev->priv_flags = flags | dst_release_flag;
|
||||
bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
|
||||
dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
|
||||
bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
|
||||
|
||||
netdev_change_features(bond_dev);
|
||||
}
|
||||
|
@ -199,7 +199,7 @@ static void __init eql_setup(struct net_device *dev)
|
||||
|
||||
dev->type = ARPHRD_SLIP;
|
||||
dev->tx_queue_len = 5; /* Hands them off fast */
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
static int eql_open(struct net_device *dev)
|
||||
|
@ -185,7 +185,8 @@ static void ifb_setup(struct net_device *dev)
|
||||
|
||||
dev->flags |= IFF_NOARP;
|
||||
dev->flags &= ~IFF_MULTICAST;
|
||||
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
|
||||
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
|
||||
netif_keep_dst(dev);
|
||||
eth_hw_addr_random(dev);
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
|
||||
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
|
||||
dev->flags = IFF_LOOPBACK;
|
||||
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
dev->hw_features = NETIF_F_ALL_TSO | NETIF_F_UFO;
|
||||
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
|
||||
| NETIF_F_ALL_TSO
|
||||
|
@ -1025,7 +1025,8 @@ void macvlan_common_setup(struct net_device *dev)
|
||||
{
|
||||
ether_setup(dev);
|
||||
|
||||
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
|
||||
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
|
||||
netif_keep_dst(dev);
|
||||
dev->priv_flags |= IFF_UNICAST_FLT;
|
||||
dev->netdev_ops = &macvlan_netdev_ops;
|
||||
dev->destructor = free_netdev;
|
||||
|
@ -1103,7 +1103,7 @@ static void ppp_setup(struct net_device *dev)
|
||||
dev->type = ARPHRD_PPP;
|
||||
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
|
||||
dev->features |= NETIF_F_NETNS_LOCAL;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -970,7 +970,8 @@ static void __team_compute_features(struct team *team)
|
||||
struct team_port *port;
|
||||
u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
|
||||
unsigned short max_hard_header_len = ETH_HLEN;
|
||||
unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
|
||||
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
|
||||
IFF_XMIT_DST_RELEASE_PERM;
|
||||
|
||||
list_for_each_entry(port, &team->port_list, list) {
|
||||
vlan_features = netdev_increment_features(vlan_features,
|
||||
@ -985,8 +986,9 @@ static void __team_compute_features(struct team *team)
|
||||
team->dev->vlan_features = vlan_features;
|
||||
team->dev->hard_header_len = max_hard_header_len;
|
||||
|
||||
flags = team->dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
|
||||
team->dev->priv_flags = flags | dst_release_flag;
|
||||
team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
|
||||
team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
|
||||
|
||||
netdev_change_features(team->dev);
|
||||
}
|
||||
|
@ -2193,7 +2193,7 @@ static void vxlan_setup(struct net_device *dev)
|
||||
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
|
||||
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
|
||||
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
|
||||
|
||||
INIT_LIST_HEAD(&vxlan->next);
|
||||
|
@ -1047,7 +1047,7 @@ static void pvc_setup(struct net_device *dev)
|
||||
dev->flags = IFF_POINTOPOINT;
|
||||
dev->hard_header_len = 10;
|
||||
dev->addr_len = 2;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
static const struct net_device_ops pvc_ops = {
|
||||
|
@ -3306,7 +3306,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
|
||||
card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
|
||||
NETIF_F_HW_VLAN_CTAG_RX |
|
||||
NETIF_F_HW_VLAN_CTAG_FILTER;
|
||||
card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(card->dev);
|
||||
card->dev->gso_max_size = 15 * PAGE_SIZE;
|
||||
|
||||
SET_NETDEV_DEV(card->dev, &card->gdev->dev);
|
||||
|
@ -1206,6 +1206,7 @@ enum netdev_priv_flags {
|
||||
IFF_SUPP_NOFCS = 1<<19,
|
||||
IFF_LIVE_ADDR_CHANGE = 1<<20,
|
||||
IFF_MACVLAN = 1<<21,
|
||||
IFF_XMIT_DST_RELEASE_PERM = 1<<22,
|
||||
};
|
||||
|
||||
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
|
||||
@ -1230,6 +1231,7 @@ enum netdev_priv_flags {
|
||||
#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS
|
||||
#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE
|
||||
#define IFF_MACVLAN IFF_MACVLAN
|
||||
#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM
|
||||
|
||||
/**
|
||||
* struct net_device - The DEVICE structure.
|
||||
@ -3588,6 +3590,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
|
||||
return dev->priv_flags & IFF_SUPP_NOFCS;
|
||||
}
|
||||
|
||||
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
|
||||
static inline void netif_keep_dst(struct net_device *dev)
|
||||
{
|
||||
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
|
||||
}
|
||||
|
||||
extern struct pernet_operations __net_initdata loopback_net_ops;
|
||||
|
||||
/* Logging, debugging and troubleshooting/diagnostic helpers. */
|
||||
|
@ -799,7 +799,8 @@ void vlan_setup(struct net_device *dev)
|
||||
ether_setup(dev);
|
||||
|
||||
dev->priv_flags |= IFF_802_1Q_VLAN;
|
||||
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
|
||||
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
|
||||
netif_keep_dst(dev);
|
||||
dev->tx_queue_len = 0;
|
||||
|
||||
dev->netdev_ops = &vlan_netdev_ops;
|
||||
|
@ -501,7 +501,7 @@ static void clip_setup(struct net_device *dev)
|
||||
/* without any more elaborate queuing. 100 is a reasonable */
|
||||
/* compromise between decent burst-tolerance and protection */
|
||||
/* against memory hogs. */
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
static int clip_create(int number)
|
||||
|
@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
|
||||
if (skb->next)
|
||||
return skb;
|
||||
|
||||
/* If device doesn't need skb->dst, release it right now while
|
||||
* its hot in this cpu cache
|
||||
*/
|
||||
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
|
||||
skb_dst_drop(skb);
|
||||
|
||||
features = netif_skb_features(skb);
|
||||
skb = validate_xmit_vlan(skb, features);
|
||||
if (unlikely(!skb))
|
||||
@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
* waiting to be sent out; and the qdisc is not running -
|
||||
* xmit the skb directly.
|
||||
*/
|
||||
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
|
||||
skb_dst_force(skb);
|
||||
|
||||
qdisc_bstats_update(q, skb);
|
||||
|
||||
@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
|
||||
rc = NET_XMIT_SUCCESS;
|
||||
} else {
|
||||
skb_dst_force(skb);
|
||||
rc = q->enqueue(skb, q) & NET_XMIT_MASK;
|
||||
if (qdisc_run_begin(q)) {
|
||||
if (unlikely(contended)) {
|
||||
@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
|
||||
|
||||
skb_update_prio(skb);
|
||||
|
||||
/* If device/qdisc don't need skb->dst, release it right now while
|
||||
* its hot in this cpu cache.
|
||||
*/
|
||||
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
|
||||
skb_dst_drop(skb);
|
||||
else
|
||||
skb_dst_force(skb);
|
||||
|
||||
txq = netdev_pick_tx(dev, skb, accel_priv);
|
||||
q = rcu_dereference_bh(txq->qdisc);
|
||||
|
||||
@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
|
||||
INIT_LIST_HEAD(&dev->adj_list.lower);
|
||||
INIT_LIST_HEAD(&dev->all_adj_list.upper);
|
||||
INIT_LIST_HEAD(&dev->all_adj_list.lower);
|
||||
dev->priv_flags = IFF_XMIT_DST_RELEASE;
|
||||
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
|
||||
setup(dev);
|
||||
|
||||
dev->num_tx_queues = txqs;
|
||||
|
@ -510,7 +510,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
|
||||
memcpy(dev->broadcast, &iph->daddr, 4);
|
||||
|
||||
dev->flags = IFF_NOARP;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
dev->addr_len = 4;
|
||||
|
||||
if (iph->daddr) {
|
||||
|
@ -364,7 +364,7 @@ static int vti_tunnel_init(struct net_device *dev)
|
||||
dev->iflink = 0;
|
||||
dev->addr_len = 4;
|
||||
dev->features |= NETIF_F_LLTX;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
|
||||
return ip_tunnel_init(dev);
|
||||
}
|
||||
|
@ -289,7 +289,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
|
||||
dev->iflink = 0;
|
||||
dev->addr_len = 4;
|
||||
dev->features |= NETIF_F_LLTX;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
|
||||
dev->features |= IPIP_FEATURES;
|
||||
dev->hw_features |= IPIP_FEATURES;
|
||||
|
@ -1242,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
|
||||
dev->flags |= IFF_NOARP;
|
||||
dev->iflink = 0;
|
||||
dev->addr_len = sizeof(struct in6_addr);
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
static int ip6gre_tunnel_init(struct net_device *dev)
|
||||
|
@ -1493,7 +1493,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
|
||||
dev->mtu -= 8;
|
||||
dev->flags |= IFF_NOARP;
|
||||
dev->addr_len = sizeof(struct in6_addr);
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
/* This perm addr will be used as interface identifier by IPv6 */
|
||||
dev->addr_assign_type = NET_ADDR_RANDOM;
|
||||
eth_random_addr(dev->perm_addr);
|
||||
|
@ -807,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev)
|
||||
dev->mtu = ETH_DATA_LEN;
|
||||
dev->flags |= IFF_NOARP;
|
||||
dev->addr_len = sizeof(struct in6_addr);
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1364,7 +1364,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
|
||||
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
|
||||
dev->mtu = ETH_DATA_LEN - t_hlen;
|
||||
dev->flags = IFF_NOARP;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
dev->iflink = 0;
|
||||
dev->addr_len = 4;
|
||||
dev->features |= NETIF_F_LLTX;
|
||||
|
@ -493,6 +493,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
|
||||
tcf_exts_change(tp, &fnew->exts, &e);
|
||||
tcf_em_tree_change(tp, &fnew->ematches, &t);
|
||||
|
||||
netif_keep_dst(qdisc_dev(tp->q));
|
||||
|
||||
if (tb[TCA_FLOW_KEYS]) {
|
||||
fnew->keymask = keymask;
|
||||
fnew->nkeys = nkeys;
|
||||
|
@ -524,6 +524,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
|
||||
if (f->handle < f1->handle)
|
||||
break;
|
||||
|
||||
netif_keep_dst(qdisc_dev(tp->q));
|
||||
rcu_assign_pointer(f->next, f1);
|
||||
rcu_assign_pointer(*fp, f);
|
||||
|
||||
|
@ -47,7 +47,6 @@ EXPORT_SYMBOL(default_qdisc_ops);
|
||||
|
||||
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
|
||||
{
|
||||
skb_dst_force(skb);
|
||||
q->gso_skb = skb;
|
||||
q->qstats.requeues++;
|
||||
q->q.qlen++; /* it's still part of the queue */
|
||||
@ -218,8 +217,6 @@ static inline int qdisc_restart(struct Qdisc *q)
|
||||
if (unlikely(!skb))
|
||||
return 0;
|
||||
|
||||
WARN_ON_ONCE(skb_dst_is_noref(skb));
|
||||
|
||||
root_lock = qdisc_lock(q);
|
||||
dev = qdisc_dev(q);
|
||||
txq = skb_get_tx_queue(dev, skb);
|
||||
|
@ -470,7 +470,7 @@ static __init void teql_master_setup(struct net_device *dev)
|
||||
dev->tx_queue_len = 100;
|
||||
dev->flags = IFF_NOARP;
|
||||
dev->hard_header_len = LL_MAX_HEADER;
|
||||
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
||||
netif_keep_dst(dev);
|
||||
}
|
||||
|
||||
static LIST_HEAD(master_dev_list);
|
||||
|
Loading…
Reference in New Issue
Block a user