netfilter: nf_tables: Add flowtable map for xdp offload

This adds a small internal mapping table so that a new bpf (xdp) kfunc
can perform lookups in a flowtable.

As-is, xdp program has access to the device pointer, but no way to do a
lookup in a flowtable -- there is no way to obtain the needed struct
without questionable stunts.

This allows to obtain an nf_flowtable pointer given a net_device
structure.

In order to keep backward compatibility, the infrastructure allows the
user to add a given device to multiple flowtables, but it will always
return the first added mapping performing the lookup since it assumes
the right configuration is 1:1 mapping between flowtables and net_devices.

Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Link: https://lore.kernel.org/bpf/9f20e2c36f494b3bf177328718367f636bb0b2ab.1719698275.git.lorenzo@kernel.org
This commit is contained in:
Florian Westphal 2024-06-30 00:26:48 +02:00 committed by Daniel Borkmann
parent a12978712d
commit 89cc8f1c5f
4 changed files with 154 additions and 2 deletions

View File

@ -305,6 +305,11 @@ struct flow_ports {
__be16 source, dest;
};
struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd);
unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,

View File

@ -142,7 +142,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
nf_flow_table_offload.o
nf_flow_table_offload.o nf_flow_table_xdp.o
nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o

View File

@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
int err;
if (!nf_flowtable_hw_offload(flowtable))
return 0;
return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
if (dev->netdev_ops->ndo_setup_tc)
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,

View File

@ -0,0 +1,147 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h>
struct flow_offload_xdp_ft {
struct list_head head;
struct nf_flowtable *ft;
struct rcu_head rcuhead;
};
struct flow_offload_xdp {
struct hlist_node hnode;
unsigned long net_device_addr;
struct list_head head;
};
#define NF_XDP_HT_BITS 4
static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
static DEFINE_MUTEX(nf_xdp_hashtable_lock);
/* caller must hold rcu read lock */
struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
{
unsigned long key = (unsigned long)dev;
struct flow_offload_xdp *iter;
hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
if (key == iter->net_device_addr) {
struct flow_offload_xdp_ft *ft_elem;
/* The user is supposed to insert a given net_device
* just into a single nf_flowtable so we always return
* the first element here.
*/
ft_elem = list_first_or_null_rcu(&iter->head,
struct flow_offload_xdp_ft,
head);
return ft_elem ? ft_elem->ft : NULL;
}
}
return NULL;
}
static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
const struct net_device *dev)
{
struct flow_offload_xdp *iter, *elem = NULL;
unsigned long key = (unsigned long)dev;
struct flow_offload_xdp_ft *ft_elem;
ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
if (!ft_elem)
return -ENOMEM;
ft_elem->ft = ft;
mutex_lock(&nf_xdp_hashtable_lock);
hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
if (key == iter->net_device_addr) {
elem = iter;
break;
}
}
if (!elem) {
elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
if (!elem)
goto err_unlock;
elem->net_device_addr = key;
INIT_LIST_HEAD(&elem->head);
hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
}
list_add_tail_rcu(&ft_elem->head, &elem->head);
mutex_unlock(&nf_xdp_hashtable_lock);
return 0;
err_unlock:
mutex_unlock(&nf_xdp_hashtable_lock);
kfree(ft_elem);
return -ENOMEM;
}
static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft,
const struct net_device *dev)
{
struct flow_offload_xdp *iter, *elem = NULL;
unsigned long key = (unsigned long)dev;
mutex_lock(&nf_xdp_hashtable_lock);
hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
if (key == iter->net_device_addr) {
elem = iter;
break;
}
}
if (elem) {
struct flow_offload_xdp_ft *ft_elem, *ft_next;
list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) {
if (ft_elem->ft == ft) {
list_del_rcu(&ft_elem->head);
kfree_rcu(ft_elem, rcuhead);
}
}
if (list_empty(&elem->head))
hash_del_rcu(&elem->hnode);
else
elem = NULL;
}
mutex_unlock(&nf_xdp_hashtable_lock);
if (elem) {
synchronize_rcu();
kfree(elem);
}
}
int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd)
{
switch (cmd) {
case FLOW_BLOCK_BIND:
return nf_flowtable_by_dev_insert(flowtable, dev);
case FLOW_BLOCK_UNBIND:
nf_flowtable_by_dev_remove(flowtable, dev);
return 0;
}
WARN_ON_ONCE(1);
return 0;
}