net: Generic XDP
This provides a generic SKB based non-optimized XDP path which is used if either the driver lacks a specific XDP implementation, or the user requests it via a new IFLA_XDP_FLAGS value named XDP_FLAGS_SKB_MODE. It is arguable that perhaps I should have required something like this as part of the initial XDP feature merge. I believe this is critical for two reasons: 1) Accessibility. More people can play with XDP with less dependencies. Yes I know we have XDP support in virtio_net, but that just creates another depedency for learning how to use this facility. I wrote this to make life easier for the XDP newbies. 2) As a model for what the expected semantics are. If there is a pure generic core implementation, it serves as a semantic example for driver folks adding XDP support. One thing I have not tried to address here is the issue of XDP_PACKET_HEADROOM, thanks to Daniel for spotting that. It seems incredibly expensive to do a skb_cow(skb, XDP_PACKET_HEADROOM) or whatever even if the XDP program doesn't try to push headers at all. I think we really need the verifier to somehow propagate whether certain XDP helpers are used or not. v5: - Handle both negative and positive offset after running prog - Fix mac length in XDP_TX case (Alexei) - Use rcu_dereference_protected() in free_netdev (kbuild test robot) v4: - Fix MAC header adjustmnet before calling prog (David Ahern) - Disable LRO when generic XDP is installed (Michael Chan) - Bypass qdisc et al. on XDP_TX and record the event (Alexei) - Do not perform generic XDP on reinjected packets (DaveM) v3: - Make sure XDP program sees packet at MAC header, push back MAC header if we do XDP_TX. (Alexei) - Elide GRO when generic XDP is in use. (Alexei) - Add XDP_FLAG_SKB_MODE flag which the user can use to request generic XDP even if the driver has an XDP implementation. (Alexei) - Report whether SKB mode is in use in rtnl_xdp_fill() via XDP_FLAGS attribute. (Daniel) v2: - Add some "fall through" comments in switch statements based upon feedback from Andrew Lunn - Use RCU for generic xdp_prog, thanks to Johannes Berg. Tested-by: Andy Gospodarek <andy@greyhouse.net> Tested-by: Jesper Dangaard Brouer <brouer@redhat.com> Tested-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									2f7878c06e
								
							
						
					
					
						commit
						b5cdae3291
					
				| @ -1905,9 +1905,17 @@ struct net_device { | ||||
| 	struct lock_class_key	*qdisc_tx_busylock; | ||||
| 	struct lock_class_key	*qdisc_running_key; | ||||
| 	bool			proto_down; | ||||
| 	struct bpf_prog __rcu	*xdp_prog; | ||||
| }; | ||||
| #define to_net_dev(d) container_of(d, struct net_device, dev) | ||||
| 
 | ||||
| static inline bool netif_elide_gro(const struct net_device *dev) | ||||
| { | ||||
| 	if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) | ||||
| 		return true; | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| #define	NETDEV_ALIGN		32 | ||||
| 
 | ||||
| static inline | ||||
|  | ||||
| @ -887,7 +887,9 @@ enum { | ||||
| /* XDP section */ | ||||
| 
 | ||||
| #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0) | ||||
| #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST) | ||||
| #define XDP_FLAGS_SKB_MODE		(2U << 0) | ||||
| #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \ | ||||
| 					 XDP_FLAGS_SKB_MODE) | ||||
| 
 | ||||
| enum { | ||||
| 	IFLA_XDP_UNSPEC, | ||||
|  | ||||
							
								
								
									
										155
									
								
								net/core/dev.c
									
									
									
									
									
								
							
							
						
						
									
										155
									
								
								net/core/dev.c
									
									
									
									
									
								
							| @ -95,6 +95,7 @@ | ||||
| #include <linux/notifier.h> | ||||
| #include <linux/skbuff.h> | ||||
| #include <linux/bpf.h> | ||||
| #include <linux/bpf_trace.h> | ||||
| #include <net/net_namespace.h> | ||||
| #include <net/sock.h> | ||||
| #include <net/busy_poll.h> | ||||
| @ -4251,6 +4252,125 @@ static int __netif_receive_skb(struct sk_buff *skb) | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static struct static_key generic_xdp_needed __read_mostly; | ||||
| 
 | ||||
| static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) | ||||
| { | ||||
| 	struct bpf_prog *new = xdp->prog; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	switch (xdp->command) { | ||||
| 	case XDP_SETUP_PROG: { | ||||
| 		struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); | ||||
| 
 | ||||
| 		rcu_assign_pointer(dev->xdp_prog, new); | ||||
| 		if (old) | ||||
| 			bpf_prog_put(old); | ||||
| 
 | ||||
| 		if (old && !new) { | ||||
| 			static_key_slow_dec(&generic_xdp_needed); | ||||
| 		} else if (new && !old) { | ||||
| 			static_key_slow_inc(&generic_xdp_needed); | ||||
| 			dev_disable_lro(dev); | ||||
| 		} | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	case XDP_QUERY_PROG: | ||||
| 		xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog); | ||||
| 		break; | ||||
| 
 | ||||
| 	default: | ||||
| 		ret = -EINVAL; | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static u32 netif_receive_generic_xdp(struct sk_buff *skb, | ||||
| 				     struct bpf_prog *xdp_prog) | ||||
| { | ||||
| 	struct xdp_buff xdp; | ||||
| 	u32 act = XDP_DROP; | ||||
| 	void *orig_data; | ||||
| 	int hlen, off; | ||||
| 	u32 mac_len; | ||||
| 
 | ||||
| 	/* Reinjected packets coming from act_mirred or similar should
 | ||||
| 	 * not get XDP generic processing. | ||||
| 	 */ | ||||
| 	if (skb_cloned(skb)) | ||||
| 		return XDP_PASS; | ||||
| 
 | ||||
| 	if (skb_linearize(skb)) | ||||
| 		goto do_drop; | ||||
| 
 | ||||
| 	/* The XDP program wants to see the packet starting at the MAC
 | ||||
| 	 * header. | ||||
| 	 */ | ||||
| 	mac_len = skb->data - skb_mac_header(skb); | ||||
| 	hlen = skb_headlen(skb) + mac_len; | ||||
| 	xdp.data = skb->data - mac_len; | ||||
| 	xdp.data_end = xdp.data + hlen; | ||||
| 	xdp.data_hard_start = skb->data - skb_headroom(skb); | ||||
| 	orig_data = xdp.data; | ||||
| 
 | ||||
| 	act = bpf_prog_run_xdp(xdp_prog, &xdp); | ||||
| 
 | ||||
| 	off = xdp.data - orig_data; | ||||
| 	if (off > 0) | ||||
| 		__skb_pull(skb, off); | ||||
| 	else if (off < 0) | ||||
| 		__skb_push(skb, -off); | ||||
| 
 | ||||
| 	switch (act) { | ||||
| 	case XDP_TX: | ||||
| 		__skb_push(skb, mac_len); | ||||
| 		/* fall through */ | ||||
| 	case XDP_PASS: | ||||
| 		break; | ||||
| 
 | ||||
| 	default: | ||||
| 		bpf_warn_invalid_xdp_action(act); | ||||
| 		/* fall through */ | ||||
| 	case XDP_ABORTED: | ||||
| 		trace_xdp_exception(skb->dev, xdp_prog, act); | ||||
| 		/* fall through */ | ||||
| 	case XDP_DROP: | ||||
| 	do_drop: | ||||
| 		kfree_skb(skb); | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return act; | ||||
| } | ||||
| 
 | ||||
| /* When doing generic XDP we have to bypass the qdisc layer and the
 | ||||
|  * network taps in order to match in-driver-XDP behavior. | ||||
|  */ | ||||
| static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) | ||||
| { | ||||
| 	struct net_device *dev = skb->dev; | ||||
| 	struct netdev_queue *txq; | ||||
| 	bool free_skb = true; | ||||
| 	int cpu, rc; | ||||
| 
 | ||||
| 	txq = netdev_pick_tx(dev, skb, NULL); | ||||
| 	cpu = smp_processor_id(); | ||||
| 	HARD_TX_LOCK(dev, txq, cpu); | ||||
| 	if (!netif_xmit_stopped(txq)) { | ||||
| 		rc = netdev_start_xmit(skb, dev, txq, 0); | ||||
| 		if (dev_xmit_complete(rc)) | ||||
| 			free_skb = false; | ||||
| 	} | ||||
| 	HARD_TX_UNLOCK(dev, txq); | ||||
| 	if (free_skb) { | ||||
| 		trace_xdp_exception(dev, xdp_prog, XDP_TX); | ||||
| 		kfree_skb(skb); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int netif_receive_skb_internal(struct sk_buff *skb) | ||||
| { | ||||
| 	int ret; | ||||
| @ -4262,6 +4382,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb) | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 
 | ||||
| 	if (static_key_false(&generic_xdp_needed)) { | ||||
| 		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); | ||||
| 
 | ||||
| 		if (xdp_prog) { | ||||
| 			u32 act = netif_receive_generic_xdp(skb, xdp_prog); | ||||
| 
 | ||||
| 			if (act != XDP_PASS) { | ||||
| 				rcu_read_unlock(); | ||||
| 				if (act == XDP_TX) | ||||
| 					generic_xdp_tx(skb, xdp_prog); | ||||
| 				return NET_RX_DROP; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| #ifdef CONFIG_RPS | ||||
| 	if (static_key_false(&rps_needed)) { | ||||
| 		struct rps_dev_flow voidflow, *rflow = &voidflow; | ||||
| @ -4494,7 +4629,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | ||||
| 	enum gro_result ret; | ||||
| 	int grow; | ||||
| 
 | ||||
| 	if (!(skb->dev->features & NETIF_F_GRO)) | ||||
| 	if (netif_elide_gro(skb->dev)) | ||||
| 		goto normal; | ||||
| 
 | ||||
| 	if (skb->csum_bad) | ||||
| @ -6723,6 +6858,7 @@ EXPORT_SYMBOL(dev_change_proto_down); | ||||
|  */ | ||||
| int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) | ||||
| { | ||||
| 	int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp); | ||||
| 	const struct net_device_ops *ops = dev->netdev_ops; | ||||
| 	struct bpf_prog *prog = NULL; | ||||
| 	struct netdev_xdp xdp; | ||||
| @ -6730,14 +6866,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) | ||||
| 
 | ||||
| 	ASSERT_RTNL(); | ||||
| 
 | ||||
| 	if (!ops->ndo_xdp) | ||||
| 		return -EOPNOTSUPP; | ||||
| 	xdp_op = ops->ndo_xdp; | ||||
| 	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) | ||||
| 		xdp_op = generic_xdp_install; | ||||
| 
 | ||||
| 	if (fd >= 0) { | ||||
| 		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { | ||||
| 			memset(&xdp, 0, sizeof(xdp)); | ||||
| 			xdp.command = XDP_QUERY_PROG; | ||||
| 
 | ||||
| 			err = ops->ndo_xdp(dev, &xdp); | ||||
| 			err = xdp_op(dev, &xdp); | ||||
| 			if (err < 0) | ||||
| 				return err; | ||||
| 			if (xdp.prog_attached) | ||||
| @ -6753,7 +6891,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) | ||||
| 	xdp.command = XDP_SETUP_PROG; | ||||
| 	xdp.prog = prog; | ||||
| 
 | ||||
| 	err = ops->ndo_xdp(dev, &xdp); | ||||
| 	err = xdp_op(dev, &xdp); | ||||
| 	if (err < 0 && prog) | ||||
| 		bpf_prog_put(prog); | ||||
| 
 | ||||
| @ -7793,6 +7931,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs); | ||||
| void free_netdev(struct net_device *dev) | ||||
| { | ||||
| 	struct napi_struct *p, *n; | ||||
| 	struct bpf_prog *prog; | ||||
| 
 | ||||
| 	might_sleep(); | ||||
| 	netif_free_tx_queues(dev); | ||||
| @ -7811,6 +7950,12 @@ void free_netdev(struct net_device *dev) | ||||
| 	free_percpu(dev->pcpu_refcnt); | ||||
| 	dev->pcpu_refcnt = NULL; | ||||
| 
 | ||||
| 	prog = rcu_dereference_protected(dev->xdp_prog, 1); | ||||
| 	if (prog) { | ||||
| 		bpf_prog_put(prog); | ||||
| 		static_key_slow_dec(&generic_xdp_needed); | ||||
| 	} | ||||
| 
 | ||||
| 	/*  Compatibility with error handling in drivers */ | ||||
| 	if (dev->reg_state == NETREG_UNINITIALIZED) { | ||||
| 		netdev_freemem(dev); | ||||
|  | ||||
| @ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) | ||||
| 	struct net_device *dev = skb->dev; | ||||
| 	struct gro_cell *cell; | ||||
| 
 | ||||
| 	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) | ||||
| 	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) | ||||
| 		return netif_rx(skb); | ||||
| 
 | ||||
| 	cell = this_cpu_ptr(gcells->cells); | ||||
|  | ||||
| @ -896,15 +896,13 @@ static size_t rtnl_port_size(const struct net_device *dev, | ||||
| 		return port_self_size; | ||||
| } | ||||
| 
 | ||||
| static size_t rtnl_xdp_size(const struct net_device *dev) | ||||
| static size_t rtnl_xdp_size(void) | ||||
| { | ||||
| 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */ | ||||
| 			  nla_total_size(1);	/* XDP_ATTACHED */ | ||||
| 			  nla_total_size(1) +	/* XDP_ATTACHED */ | ||||
| 			  nla_total_size(4);	/* XDP_FLAGS */ | ||||
| 
 | ||||
| 	if (!dev->netdev_ops->ndo_xdp) | ||||
| 		return 0; | ||||
| 	else | ||||
| 		return xdp_size; | ||||
| 	return xdp_size; | ||||
| } | ||||
| 
 | ||||
| static noinline size_t if_nlmsg_size(const struct net_device *dev, | ||||
| @ -943,7 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, | ||||
| 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ | ||||
| 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ | ||||
| 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ | ||||
| 	       + rtnl_xdp_size(dev) /* IFLA_XDP */ | ||||
| 	       + rtnl_xdp_size() /* IFLA_XDP */ | ||||
| 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */ | ||||
| 
 | ||||
| } | ||||
| @ -1251,23 +1249,35 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) | ||||
| 
 | ||||
| static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) | ||||
| { | ||||
| 	struct netdev_xdp xdp_op = {}; | ||||
| 	struct nlattr *xdp; | ||||
| 	u32 xdp_flags = 0; | ||||
| 	u8 val = 0; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (!dev->netdev_ops->ndo_xdp) | ||||
| 		return 0; | ||||
| 	xdp = nla_nest_start(skb, IFLA_XDP); | ||||
| 	if (!xdp) | ||||
| 		return -EMSGSIZE; | ||||
| 	xdp_op.command = XDP_QUERY_PROG; | ||||
| 	err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); | ||||
| 	if (err) | ||||
| 		goto err_cancel; | ||||
| 	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached); | ||||
| 	if (rcu_access_pointer(dev->xdp_prog)) { | ||||
| 		xdp_flags = XDP_FLAGS_SKB_MODE; | ||||
| 		val = 1; | ||||
| 	} else if (dev->netdev_ops->ndo_xdp) { | ||||
| 		struct netdev_xdp xdp_op = {}; | ||||
| 
 | ||||
| 		xdp_op.command = XDP_QUERY_PROG; | ||||
| 		err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); | ||||
| 		if (err) | ||||
| 			goto err_cancel; | ||||
| 		val = xdp_op.prog_attached; | ||||
| 	} | ||||
| 	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val); | ||||
| 	if (err) | ||||
| 		goto err_cancel; | ||||
| 
 | ||||
| 	if (xdp_flags) { | ||||
| 		err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags); | ||||
| 		if (err) | ||||
| 			goto err_cancel; | ||||
| 	} | ||||
| 	nla_nest_end(skb, xdp); | ||||
| 	return 0; | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user