mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
Merge branch 'bpf-xsk-fix-mixed-mode'
Magnus Karlsson says: ==================== Previously, the xsk code did not record which umem was bound to a specific queue id. This was not required if all drivers were zero-copy enabled as this had to be recorded in the driver anyway. So if a user tried to bind two umems to the same queue, the driver would say no. But if copy-mode was first enabled and then zero-copy mode (or the reverse order), we mistakenly enabled both of them on the same umem leading to buggy behavior. The main culprit for this is that we did not store the association of umem to queue id in the copy case and only relied on the driver reporting this. As this relation was not stored in the driver for copy mode (it does not rely on the AF_XDP NDOs), this obviously could not work. This patch fixes the problem by always recording the umem to queue id relationship in the netdev_queue and netdev_rx_queue structs. This way we always know what kind of umem has been bound to a queue id and can act appropriately at bind time. To make the bind semantics consistent with ethtool queue manipulations and to facilitate the implementation of drivers, we also forbid decreasing the number of queues/channels with ethtool if there is an active AF_XDP socket in the set of queues that are disabled. Jakub, please take a look at your patches. The last one I had to change slightly to make it fit with the new interface xdp_get_umem_from_qid(). An added bonus with this function is that we, in the future, can also use it from the driver to get a umem, thus simplifying driver implementations (and later remove the umem from the NDO completely). Björn will mail patches, at a later point in time, using this in the i40e and ixgbe drivers, that removes a good chunk of code from the ZC implementations. I also made your code aware of Tx queues. If we create a socket that only has a Tx queue, then the queue id will refer to a Tx queue id only and could be larger than the available amount of Rx queues. Please take a look at it. Differences against v1: * Included patches from Jakub that forbids decreasing the number of active queues if a queue to be deactivated has an AF_XDP socket. These have been adapted somewhat to the new interfaces in patch 2. * Removed redundant check against real_num_[rt]x_queue in xsk_bind * Only need to test against real_num_[rt]x_queues in xdp_clear_umem_at_qid. Patch 1: Introduces a umem reference in the netdev_rx_queue and netdev_queue structs. Patch 2: Records which queue_id is bound to which umem and make sure that you cannot bind two different umems to the same queue_id. Patch 3: Pre patch to ethtool_set_channels. Patch 4: Forbid decreasing the number of active queues if a deactivated queue has an AF_XDP socket. Patch 5: Simplify xdp_clear_umem_at_qid now when ethtool cannot deactivate the queue id we are running on. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
commit
df1ea77bc5
@ -609,6 +609,9 @@ struct netdev_queue {
|
|||||||
|
|
||||||
/* Subordinate device that the queue has been assigned to */
|
/* Subordinate device that the queue has been assigned to */
|
||||||
struct net_device *sb_dev;
|
struct net_device *sb_dev;
|
||||||
|
#ifdef CONFIG_XDP_SOCKETS
|
||||||
|
struct xdp_umem *umem;
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
* write-mostly part
|
* write-mostly part
|
||||||
*/
|
*/
|
||||||
@ -738,6 +741,9 @@ struct netdev_rx_queue {
|
|||||||
struct kobject kobj;
|
struct kobject kobj;
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
struct xdp_rxq_info xdp_rxq;
|
struct xdp_rxq_info xdp_rxq;
|
||||||
|
#ifdef CONFIG_XDP_SOCKETS
|
||||||
|
struct xdp_umem *umem;
|
||||||
|
#endif
|
||||||
} ____cacheline_aligned_in_smp;
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -86,6 +86,7 @@ struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
|
|||||||
struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
|
struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
|
||||||
struct xdp_umem_fq_reuse *newq);
|
struct xdp_umem_fq_reuse *newq);
|
||||||
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
|
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
|
||||||
|
struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
|
||||||
|
|
||||||
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
|
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
|
||||||
{
|
{
|
||||||
@ -183,6 +184,12 @@ static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
|
||||||
|
u16 queue_id)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
|
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <linux/rtnetlink.h>
|
#include <linux/rtnetlink.h>
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/net.h>
|
#include <linux/net.h>
|
||||||
|
#include <net/xdp_sock.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some useful ethtool_ops methods that're device independent.
|
* Some useful ethtool_ops methods that're device independent.
|
||||||
@ -1655,8 +1656,10 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
|
|||||||
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
|
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
|
||||||
void __user *useraddr)
|
void __user *useraddr)
|
||||||
{
|
{
|
||||||
struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS };
|
struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
|
||||||
|
u16 from_channel, to_channel;
|
||||||
u32 max_rx_in_use = 0;
|
u32 max_rx_in_use = 0;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
|
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
@ -1664,13 +1667,13 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
|
|||||||
if (copy_from_user(&channels, useraddr, sizeof(channels)))
|
if (copy_from_user(&channels, useraddr, sizeof(channels)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
dev->ethtool_ops->get_channels(dev, &max);
|
dev->ethtool_ops->get_channels(dev, &curr);
|
||||||
|
|
||||||
/* ensure new counts are within the maximums */
|
/* ensure new counts are within the maximums */
|
||||||
if ((channels.rx_count > max.max_rx) ||
|
if (channels.rx_count > curr.max_rx ||
|
||||||
(channels.tx_count > max.max_tx) ||
|
channels.tx_count > curr.max_tx ||
|
||||||
(channels.combined_count > max.max_combined) ||
|
channels.combined_count > curr.max_combined ||
|
||||||
(channels.other_count > max.max_other))
|
channels.other_count > curr.max_other)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* ensure the new Rx count fits within the configured Rx flow
|
/* ensure the new Rx count fits within the configured Rx flow
|
||||||
@ -1680,6 +1683,14 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
|
|||||||
(channels.combined_count + channels.rx_count) <= max_rx_in_use)
|
(channels.combined_count + channels.rx_count) <= max_rx_in_use)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Disabling channels, query zero-copy AF_XDP sockets */
|
||||||
|
from_channel = channels.combined_count +
|
||||||
|
min(channels.rx_count, channels.tx_count);
|
||||||
|
to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
|
||||||
|
for (i = from_channel; i < to_channel; i++)
|
||||||
|
if (xdp_get_umem_from_qid(dev, i))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
return dev->ethtool_ops->set_channels(dev, &channels);
|
return dev->ethtool_ops->set_channels(dev, &channels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,27 +42,44 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int xdp_umem_query(struct net_device *dev, u16 queue_id)
|
/* The umem is stored both in the _rx struct and the _tx struct as we do
|
||||||
|
* not know if the device has more tx queues than rx, or the opposite.
|
||||||
|
* This might also change during run time.
|
||||||
|
*/
|
||||||
|
static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
|
||||||
|
u16 queue_id)
|
||||||
{
|
{
|
||||||
struct netdev_bpf bpf;
|
if (queue_id < dev->real_num_rx_queues)
|
||||||
|
dev->_rx[queue_id].umem = umem;
|
||||||
|
if (queue_id < dev->real_num_tx_queues)
|
||||||
|
dev->_tx[queue_id].umem = umem;
|
||||||
|
}
|
||||||
|
|
||||||
ASSERT_RTNL();
|
struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
|
||||||
|
u16 queue_id)
|
||||||
|
{
|
||||||
|
if (queue_id < dev->real_num_rx_queues)
|
||||||
|
return dev->_rx[queue_id].umem;
|
||||||
|
if (queue_id < dev->real_num_tx_queues)
|
||||||
|
return dev->_tx[queue_id].umem;
|
||||||
|
|
||||||
memset(&bpf, 0, sizeof(bpf));
|
return NULL;
|
||||||
bpf.command = XDP_QUERY_XSK_UMEM;
|
}
|
||||||
bpf.xsk.queue_id = queue_id;
|
|
||||||
|
|
||||||
if (!dev->netdev_ops->ndo_bpf)
|
static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
|
||||||
return 0;
|
{
|
||||||
return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
|
if (queue_id < dev->real_num_rx_queues)
|
||||||
|
dev->_rx[queue_id].umem = NULL;
|
||||||
|
if (queue_id < dev->real_num_tx_queues)
|
||||||
|
dev->_tx[queue_id].umem = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
|
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
|
||||||
u32 queue_id, u16 flags)
|
u16 queue_id, u16 flags)
|
||||||
{
|
{
|
||||||
bool force_zc, force_copy;
|
bool force_zc, force_copy;
|
||||||
struct netdev_bpf bpf;
|
struct netdev_bpf bpf;
|
||||||
int err;
|
int err = 0;
|
||||||
|
|
||||||
force_zc = flags & XDP_ZEROCOPY;
|
force_zc = flags & XDP_ZEROCOPY;
|
||||||
force_copy = flags & XDP_COPY;
|
force_copy = flags & XDP_COPY;
|
||||||
@ -70,17 +87,23 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
|
|||||||
if (force_zc && force_copy)
|
if (force_zc && force_copy)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (force_copy)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
|
|
||||||
return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
|
|
||||||
|
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
err = xdp_umem_query(dev, queue_id);
|
if (xdp_get_umem_from_qid(dev, queue_id)) {
|
||||||
if (err) {
|
err = -EBUSY;
|
||||||
err = err < 0 ? -EOPNOTSUPP : -EBUSY;
|
goto out_rtnl_unlock;
|
||||||
goto err_rtnl_unlock;
|
}
|
||||||
|
|
||||||
|
xdp_reg_umem_at_qid(dev, umem, queue_id);
|
||||||
|
umem->dev = dev;
|
||||||
|
umem->queue_id = queue_id;
|
||||||
|
if (force_copy)
|
||||||
|
/* For copy-mode, we are done. */
|
||||||
|
goto out_rtnl_unlock;
|
||||||
|
|
||||||
|
if (!dev->netdev_ops->ndo_bpf ||
|
||||||
|
!dev->netdev_ops->ndo_xsk_async_xmit) {
|
||||||
|
err = -EOPNOTSUPP;
|
||||||
|
goto err_unreg_umem;
|
||||||
}
|
}
|
||||||
|
|
||||||
bpf.command = XDP_SETUP_XSK_UMEM;
|
bpf.command = XDP_SETUP_XSK_UMEM;
|
||||||
@ -89,18 +112,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
|
|||||||
|
|
||||||
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
|
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
|
||||||
if (err)
|
if (err)
|
||||||
goto err_rtnl_unlock;
|
goto err_unreg_umem;
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
|
|
||||||
dev_hold(dev);
|
dev_hold(dev);
|
||||||
umem->dev = dev;
|
|
||||||
umem->queue_id = queue_id;
|
|
||||||
umem->zc = true;
|
umem->zc = true;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_rtnl_unlock:
|
err_unreg_umem:
|
||||||
|
xdp_clear_umem_at_qid(dev, queue_id);
|
||||||
|
if (!force_zc)
|
||||||
|
err = 0; /* fallback to copy mode */
|
||||||
|
out_rtnl_unlock:
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
return force_zc ? err : 0; /* fail or fallback */
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xdp_umem_clear_dev(struct xdp_umem *umem)
|
static void xdp_umem_clear_dev(struct xdp_umem *umem)
|
||||||
@ -108,7 +133,7 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
|
|||||||
struct netdev_bpf bpf;
|
struct netdev_bpf bpf;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (umem->dev) {
|
if (umem->zc) {
|
||||||
bpf.command = XDP_SETUP_XSK_UMEM;
|
bpf.command = XDP_SETUP_XSK_UMEM;
|
||||||
bpf.xsk.umem = NULL;
|
bpf.xsk.umem = NULL;
|
||||||
bpf.xsk.queue_id = umem->queue_id;
|
bpf.xsk.queue_id = umem->queue_id;
|
||||||
@ -119,9 +144,17 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
|
|||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
WARN(1, "failed to disable umem!\n");
|
WARN(1, "failed to disable umem!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (umem->dev) {
|
||||||
|
rtnl_lock();
|
||||||
|
xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
|
||||||
|
rtnl_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (umem->zc) {
|
||||||
dev_put(umem->dev);
|
dev_put(umem->dev);
|
||||||
umem->dev = NULL;
|
umem->zc = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include <net/xdp_sock.h>
|
#include <net/xdp_sock.h>
|
||||||
|
|
||||||
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
|
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
|
||||||
u32 queue_id, u16 flags);
|
u16 queue_id, u16 flags);
|
||||||
bool xdp_umem_validate_queues(struct xdp_umem *umem);
|
bool xdp_umem_validate_queues(struct xdp_umem *umem);
|
||||||
void xdp_get_umem(struct xdp_umem *umem);
|
void xdp_get_umem(struct xdp_umem *umem);
|
||||||
void xdp_put_umem(struct xdp_umem *umem);
|
void xdp_put_umem(struct xdp_umem *umem);
|
||||||
|
@ -419,13 +419,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
qid = sxdp->sxdp_queue_id;
|
qid = sxdp->sxdp_queue_id;
|
||||||
|
|
||||||
if ((xs->rx && qid >= dev->real_num_rx_queues) ||
|
|
||||||
(xs->tx && qid >= dev->real_num_tx_queues)) {
|
|
||||||
err = -EINVAL;
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
flags = sxdp->sxdp_flags;
|
flags = sxdp->sxdp_flags;
|
||||||
|
|
||||||
if (flags & XDP_SHARED_UMEM) {
|
if (flags & XDP_SHARED_UMEM) {
|
||||||
|
Loading…
Reference in New Issue
Block a user