mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 14:42:24 +00:00
xdp: Move devmap bulk queue into struct net_device
Commit 96360004b8
("xdp: Make devmap flush_list common for all map
instances"), changed devmap flushing to be a global operation instead of a
per-map operation. However, the queue structure used for bulking was still
allocated as part of the containing map.
This patch moves the devmap bulk queue into struct net_device. The
motivation for this is reusing it for the non-map variant of XDP_REDIRECT,
which will be changed in a subsequent commit. To avoid other fields of
struct net_device moving to different cache lines, we also move a couple of
other members around.
We defer the actual allocation of the bulk queue structure until the
NETDEV_REGISTER notification devmap.c. This makes it possible to check for
ndo_xdp_xmit support before allocating the structure, which is not possible
at the time struct net_device is allocated. However, we keep the freeing in
free_netdev() to avoid adding another RCU callback on NETDEV_UNREGISTER.
Because of this change, we lose the reference back to the map that
originated the redirect, so change the tracepoint to always return 0 as the
map ID and index. Otherwise no functional change is intended with this
patch.
After this patch, the relevant part of struct net_device looks like this,
according to pahole:
/* --- cacheline 14 boundary (896 bytes) --- */
struct netdev_queue * _tx __attribute__((__aligned__(64))); /* 896 8 */
unsigned int num_tx_queues; /* 904 4 */
unsigned int real_num_tx_queues; /* 908 4 */
struct Qdisc * qdisc; /* 912 8 */
unsigned int tx_queue_len; /* 920 4 */
spinlock_t tx_global_lock; /* 924 4 */
struct xdp_dev_bulk_queue * xdp_bulkq; /* 928 8 */
struct xps_dev_maps * xps_cpus_map; /* 936 8 */
struct xps_dev_maps * xps_rxqs_map; /* 944 8 */
struct mini_Qdisc * miniq_egress; /* 952 8 */
/* --- cacheline 15 boundary (960 bytes) --- */
struct hlist_head qdisc_hash[16]; /* 960 128 */
/* --- cacheline 17 boundary (1088 bytes) --- */
struct timer_list watchdog_timer; /* 1088 40 */
/* XXX last struct has 4 bytes of padding */
int watchdog_timeo; /* 1128 4 */
/* XXX 4 bytes hole, try to pack */
struct list_head todo_list; /* 1136 16 */
/* --- cacheline 18 boundary (1152 bytes) --- */
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/157918768397.1458396.12673224324627072349.stgit@toke.dk
This commit is contained in:
parent
20f21d98cf
commit
75ccae62cb
@ -876,6 +876,7 @@ enum bpf_netdev_command {
|
|||||||
struct bpf_prog_offload_ops;
|
struct bpf_prog_offload_ops;
|
||||||
struct netlink_ext_ack;
|
struct netlink_ext_ack;
|
||||||
struct xdp_umem;
|
struct xdp_umem;
|
||||||
|
struct xdp_dev_bulk_queue;
|
||||||
|
|
||||||
struct netdev_bpf {
|
struct netdev_bpf {
|
||||||
enum bpf_netdev_command command;
|
enum bpf_netdev_command command;
|
||||||
@ -1986,12 +1987,10 @@ struct net_device {
|
|||||||
unsigned int num_tx_queues;
|
unsigned int num_tx_queues;
|
||||||
unsigned int real_num_tx_queues;
|
unsigned int real_num_tx_queues;
|
||||||
struct Qdisc *qdisc;
|
struct Qdisc *qdisc;
|
||||||
#ifdef CONFIG_NET_SCHED
|
|
||||||
DECLARE_HASHTABLE (qdisc_hash, 4);
|
|
||||||
#endif
|
|
||||||
unsigned int tx_queue_len;
|
unsigned int tx_queue_len;
|
||||||
spinlock_t tx_global_lock;
|
spinlock_t tx_global_lock;
|
||||||
int watchdog_timeo;
|
|
||||||
|
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
|
||||||
|
|
||||||
#ifdef CONFIG_XPS
|
#ifdef CONFIG_XPS
|
||||||
struct xps_dev_maps __rcu *xps_cpus_map;
|
struct xps_dev_maps __rcu *xps_cpus_map;
|
||||||
@ -2001,11 +2000,15 @@ struct net_device {
|
|||||||
struct mini_Qdisc __rcu *miniq_egress;
|
struct mini_Qdisc __rcu *miniq_egress;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_NET_SCHED
|
||||||
|
DECLARE_HASHTABLE (qdisc_hash, 4);
|
||||||
|
#endif
|
||||||
/* These may be needed for future network-power-down code. */
|
/* These may be needed for future network-power-down code. */
|
||||||
struct timer_list watchdog_timer;
|
struct timer_list watchdog_timer;
|
||||||
|
int watchdog_timeo;
|
||||||
|
|
||||||
int __percpu *pcpu_refcnt;
|
|
||||||
struct list_head todo_list;
|
struct list_head todo_list;
|
||||||
|
int __percpu *pcpu_refcnt;
|
||||||
|
|
||||||
struct list_head link_watch_list;
|
struct list_head link_watch_list;
|
||||||
|
|
||||||
|
@ -278,7 +278,7 @@ TRACE_EVENT(xdp_devmap_xmit,
|
|||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->map_id = map->id;
|
__entry->map_id = map ? map->id : 0;
|
||||||
__entry->act = XDP_REDIRECT;
|
__entry->act = XDP_REDIRECT;
|
||||||
__entry->map_index = map_index;
|
__entry->map_index = map_index;
|
||||||
__entry->drops = drops;
|
__entry->drops = drops;
|
||||||
|
@ -53,13 +53,11 @@
|
|||||||
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
||||||
|
|
||||||
#define DEV_MAP_BULK_SIZE 16
|
#define DEV_MAP_BULK_SIZE 16
|
||||||
struct bpf_dtab_netdev;
|
struct xdp_dev_bulk_queue {
|
||||||
|
|
||||||
struct xdp_bulk_queue {
|
|
||||||
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
|
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
|
||||||
struct list_head flush_node;
|
struct list_head flush_node;
|
||||||
|
struct net_device *dev;
|
||||||
struct net_device *dev_rx;
|
struct net_device *dev_rx;
|
||||||
struct bpf_dtab_netdev *obj;
|
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -67,9 +65,8 @@ struct bpf_dtab_netdev {
|
|||||||
struct net_device *dev; /* must be first member, due to tracepoint */
|
struct net_device *dev; /* must be first member, due to tracepoint */
|
||||||
struct hlist_node index_hlist;
|
struct hlist_node index_hlist;
|
||||||
struct bpf_dtab *dtab;
|
struct bpf_dtab *dtab;
|
||||||
struct xdp_bulk_queue __percpu *bulkq;
|
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
unsigned int idx; /* keep track of map index for tracepoint */
|
unsigned int idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_dtab {
|
struct bpf_dtab {
|
||||||
@ -219,7 +216,6 @@ static void dev_map_free(struct bpf_map *map)
|
|||||||
|
|
||||||
hlist_for_each_entry_safe(dev, next, head, index_hlist) {
|
hlist_for_each_entry_safe(dev, next, head, index_hlist) {
|
||||||
hlist_del_rcu(&dev->index_hlist);
|
hlist_del_rcu(&dev->index_hlist);
|
||||||
free_percpu(dev->bulkq);
|
|
||||||
dev_put(dev->dev);
|
dev_put(dev->dev);
|
||||||
kfree(dev);
|
kfree(dev);
|
||||||
}
|
}
|
||||||
@ -234,7 +230,6 @@ static void dev_map_free(struct bpf_map *map)
|
|||||||
if (!dev)
|
if (!dev)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
free_percpu(dev->bulkq);
|
|
||||||
dev_put(dev->dev);
|
dev_put(dev->dev);
|
||||||
kfree(dev);
|
kfree(dev);
|
||||||
}
|
}
|
||||||
@ -320,10 +315,9 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
|
|||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
|
static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
|
||||||
{
|
{
|
||||||
struct bpf_dtab_netdev *obj = bq->obj;
|
struct net_device *dev = bq->dev;
|
||||||
struct net_device *dev = obj->dev;
|
|
||||||
int sent = 0, drops = 0, err = 0;
|
int sent = 0, drops = 0, err = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -346,8 +340,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
|
|||||||
out:
|
out:
|
||||||
bq->count = 0;
|
bq->count = 0;
|
||||||
|
|
||||||
trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx,
|
trace_xdp_devmap_xmit(NULL, 0, sent, drops, bq->dev_rx, dev, err);
|
||||||
sent, drops, bq->dev_rx, dev, err);
|
|
||||||
bq->dev_rx = NULL;
|
bq->dev_rx = NULL;
|
||||||
__list_del_clearprev(&bq->flush_node);
|
__list_del_clearprev(&bq->flush_node);
|
||||||
return 0;
|
return 0;
|
||||||
@ -374,7 +367,7 @@ error:
|
|||||||
void __dev_map_flush(void)
|
void __dev_map_flush(void)
|
||||||
{
|
{
|
||||||
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
|
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
|
||||||
struct xdp_bulk_queue *bq, *tmp;
|
struct xdp_dev_bulk_queue *bq, *tmp;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
|
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
|
||||||
@ -401,12 +394,12 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
|||||||
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
||||||
* Thus, safe percpu variable access.
|
* Thus, safe percpu variable access.
|
||||||
*/
|
*/
|
||||||
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
|
static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
|
|
||||||
{
|
{
|
||||||
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
|
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
|
||||||
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
|
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
|
||||||
|
|
||||||
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
|
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
|
||||||
bq_xmit_all(bq, 0);
|
bq_xmit_all(bq, 0);
|
||||||
@ -444,7 +437,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
|||||||
if (unlikely(!xdpf))
|
if (unlikely(!xdpf))
|
||||||
return -EOVERFLOW;
|
return -EOVERFLOW;
|
||||||
|
|
||||||
return bq_enqueue(dst, xdpf, dev_rx);
|
return bq_enqueue(dev, xdpf, dev_rx);
|
||||||
}
|
}
|
||||||
|
|
||||||
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
||||||
@ -483,7 +476,6 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
|
|||||||
struct bpf_dtab_netdev *dev;
|
struct bpf_dtab_netdev *dev;
|
||||||
|
|
||||||
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
|
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
|
||||||
free_percpu(dev->bulkq);
|
|
||||||
dev_put(dev->dev);
|
dev_put(dev->dev);
|
||||||
kfree(dev);
|
kfree(dev);
|
||||||
}
|
}
|
||||||
@ -538,30 +530,15 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
|
|||||||
u32 ifindex,
|
u32 ifindex,
|
||||||
unsigned int idx)
|
unsigned int idx)
|
||||||
{
|
{
|
||||||
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
|
|
||||||
struct bpf_dtab_netdev *dev;
|
struct bpf_dtab_netdev *dev;
|
||||||
struct xdp_bulk_queue *bq;
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
|
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
|
||||||
|
dtab->map.numa_node);
|
||||||
if (!dev)
|
if (!dev)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
|
|
||||||
sizeof(void *), gfp);
|
|
||||||
if (!dev->bulkq) {
|
|
||||||
kfree(dev);
|
|
||||||
return ERR_PTR(-ENOMEM);
|
|
||||||
}
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
bq = per_cpu_ptr(dev->bulkq, cpu);
|
|
||||||
bq->obj = dev;
|
|
||||||
}
|
|
||||||
|
|
||||||
dev->dev = dev_get_by_index(net, ifindex);
|
dev->dev = dev_get_by_index(net, ifindex);
|
||||||
if (!dev->dev) {
|
if (!dev->dev) {
|
||||||
free_percpu(dev->bulkq);
|
|
||||||
kfree(dev);
|
kfree(dev);
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
@ -721,9 +698,23 @@ static int dev_map_notification(struct notifier_block *notifier,
|
|||||||
{
|
{
|
||||||
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
|
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
|
||||||
struct bpf_dtab *dtab;
|
struct bpf_dtab *dtab;
|
||||||
int i;
|
int i, cpu;
|
||||||
|
|
||||||
switch (event) {
|
switch (event) {
|
||||||
|
case NETDEV_REGISTER:
|
||||||
|
if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* will be freed in free_netdev() */
|
||||||
|
netdev->xdp_bulkq =
|
||||||
|
__alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue),
|
||||||
|
sizeof(void *), GFP_ATOMIC);
|
||||||
|
if (!netdev->xdp_bulkq)
|
||||||
|
return NOTIFY_BAD;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu)
|
||||||
|
per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
|
||||||
|
break;
|
||||||
case NETDEV_UNREGISTER:
|
case NETDEV_UNREGISTER:
|
||||||
/* This rcu_read_lock/unlock pair is needed because
|
/* This rcu_read_lock/unlock pair is needed because
|
||||||
* dev_map_list is an RCU list AND to ensure a delete
|
* dev_map_list is an RCU list AND to ensure a delete
|
||||||
|
@ -9847,6 +9847,8 @@ void free_netdev(struct net_device *dev)
|
|||||||
|
|
||||||
free_percpu(dev->pcpu_refcnt);
|
free_percpu(dev->pcpu_refcnt);
|
||||||
dev->pcpu_refcnt = NULL;
|
dev->pcpu_refcnt = NULL;
|
||||||
|
free_percpu(dev->xdp_bulkq);
|
||||||
|
dev->xdp_bulkq = NULL;
|
||||||
|
|
||||||
netdev_unregister_lockdep_key(dev);
|
netdev_unregister_lockdep_key(dev);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user