diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e68c8eb4ea8e..4e2caaf8b5da 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -135,7 +135,7 @@ struct vxlan_dev { __u16 port_max; __u8 tos; /* TOS override */ __u8 ttl; - u32 flags; /* VXLAN_F_* below */ + u32 flags; /* VXLAN_F_* in vxlan.h */ struct work_struct sock_work; struct work_struct igmp_join; @@ -150,13 +150,6 @@ struct vxlan_dev { struct hlist_head fdb_head[FDB_HASH_SIZE]; }; -#define VXLAN_F_LEARN 0x01 -#define VXLAN_F_PROXY 0x02 -#define VXLAN_F_RSC 0x04 -#define VXLAN_F_L2MISS 0x08 -#define VXLAN_F_L3MISS 0x10 -#define VXLAN_F_IPV6 0x20 /* internal flag */ - /* salt for hash table */ static u32 vxlan_salt __read_mostly; static struct workqueue_struct *vxlan_wq; @@ -1601,18 +1594,11 @@ __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(vxlan_src_port); -static int handle_offloads(struct sk_buff *skb) +static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb, + bool udp_csum) { - if (skb_is_gso(skb)) { - int err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - return err; - - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return 0; + int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + return iptunnel_handle_offloads(skb, udp_csum, type); } #if IS_ENABLED(CONFIG_IPV6) @@ -1629,10 +1615,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, int min_headroom; int err; - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk)); + if (IS_ERR(skb)) + return -EINVAL; skb_scrub_packet(skb, xnet); @@ -1666,27 +1651,14 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, uh->source = src_port; uh->len = htons(skb->len); - uh->check = 0; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_set(skb, dst); - if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) { - __wsum csum = skb_checksum(skb, 0, skb->len, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(saddr, daddr, skb->len, - IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(saddr, daddr, - skb->len, IPPROTO_UDP, 0); - } + udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb, + saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); @@ -1702,10 +1674,6 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - err = handle_offloads(skb); - if (err) - return err; - ip6tunnel_xmit(skb, dev); return 0; } @@ -1721,10 +1689,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, int min_headroom; int err; - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx); + if (IS_ERR(skb)) + return -EINVAL; min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + VXLAN_HLEN + sizeof(struct iphdr) @@ -1756,11 +1723,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, uh->source = src_port; uh->len = htons(skb->len); - uh->check = 0; - err = handle_offloads(skb); - if (err) - return err; + udp_set_csum(vs->sock->sk->sk_no_check_tx, skb, + src, dst, skb->len); return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); @@ -2405,7 +2370,7 @@ static void vxlan_del_work(struct work_struct *work) * could be used for both IPv4 and IPv6 communications, but * users may set bindv6only=1. */ -static struct socket *create_v6_sock(struct net *net, __be16 port) +static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags) { struct sock *sk; struct socket *sock; @@ -2442,18 +2407,25 @@ static struct socket *create_v6_sock(struct net *net, __be16 port) /* Disable multicast loopback */ inet_sk(sk)->mc_loop = 0; + + if (flags & VXLAN_F_UDP_ZERO_CSUM6_TX) + udp_set_no_check6_tx(sk, true); + + if (flags & VXLAN_F_UDP_ZERO_CSUM6_RX) + udp_set_no_check6_rx(sk, true); + return sock; } #else -static struct socket *create_v6_sock(struct net *net, __be16 port) +static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags) { return ERR_PTR(-EPFNOSUPPORT); } #endif -static struct socket *create_v4_sock(struct net *net, __be16 port) +static struct socket *create_v4_sock(struct net *net, __be16 port, u32 flags) { struct sock *sk; struct socket *sock; @@ -2486,18 +2458,24 @@ static struct socket *create_v4_sock(struct net *net, __be16 port) /* Disable multicast loopback */ inet_sk(sk)->mc_loop = 0; + + if (!(flags & VXLAN_F_UDP_CSUM)) + sock->sk->sk_no_check_tx = 1; + return sock; } /* Create new listen socket if needed */ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data, bool ipv6) + vxlan_rcv_t *rcv, void *data, + u32 flags) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; struct socket *sock; struct sock *sk; unsigned int h; + bool ipv6 = !!(flags & VXLAN_F_IPV6); vs = kzalloc(sizeof(*vs), GFP_KERNEL); if (!vs) @@ -2509,9 +2487,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, INIT_WORK(&vs->del_work, vxlan_del_work); if (ipv6) - sock = create_v6_sock(net, port); + sock = create_v6_sock(net, port, flags); else - sock = create_v4_sock(net, port); + sock = create_v4_sock(net, port, flags); if (IS_ERR(sock)) { kfree(vs); return ERR_CAST(sock); @@ -2549,12 +2527,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, - bool no_share, bool ipv6) + bool no_share, u32 flags) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; - vs = vxlan_socket_create(net, port, rcv, data, ipv6); + vs = vxlan_socket_create(net, port, rcv, data, flags); if (!IS_ERR(vs)) return vs; @@ -2587,7 +2565,7 @@ static void vxlan_sock_work(struct work_struct *work) __be16 port = vxlan->dst_port; struct vxlan_sock *nvs; - nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags & VXLAN_F_IPV6); + nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags); spin_lock(&vn->sock_lock); if (!IS_ERR(nvs)) vxlan_vs_add_dev(nvs, vxlan); @@ -2711,6 +2689,17 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (data[IFLA_VXLAN_PORT]) vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); + if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM])) + vxlan->flags |= VXLAN_F_UDP_CSUM; + + if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] && + nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) + vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; + + if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && + nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) + vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; + if (vxlan_find_vni(net, vni, vxlan->dst_port)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; @@ -2774,7 +2763,10 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ nla_total_size(sizeof(struct ifla_vxlan_port_range)) + - nla_total_size(sizeof(__be16))+ /* IFLA_VXLAN_PORT */ + nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ 0; } @@ -2834,7 +2826,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(vxlan->flags & VXLAN_F_L3MISS)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) || - nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port)) + nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) || + nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, + !!(vxlan->flags & VXLAN_F_UDP_CSUM)) || + nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || + nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX))) goto nla_put_failure; if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index c26d0ec2ef3a..e5a589435e2b 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -42,9 +42,11 @@ enum { NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ + NETIF_F_GSO_GRE_CSUM_BIT, /* ... GRE with csum with TSO */ NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ + NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_MPLS_BIT, @@ -111,6 +113,7 @@ enum { #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_GRE_CSUM __NETIF_F(GSO_GRE_CSUM) #define NETIF_F_GSO_IPIP __NETIF_F(GSO_IPIP) #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7a9beeb1c458..c705808bef9c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -345,6 +345,10 @@ enum { SKB_GSO_UDP_TUNNEL = 1 << 9, SKB_GSO_MPLS = 1 << 10, + + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + + SKB_GSO_GRE_CSUM = 1 << 12, }; #if BITS_PER_LONG > 32 @@ -567,7 +571,8 @@ struct sk_buff { * headers if needed */ __u8 encapsulation:1; - /* 6/8 bit hole (depending on ndisc_nodetype presence) */ + __u8 encap_hdr_csum:1; + /* 5/7 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL @@ -2988,6 +2993,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) struct skb_gso_cb { int mac_offset; int encap_level; + __u16 csum_start; }; #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) @@ -3012,6 +3018,28 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) return 0; } +/* Compute the checksum for a gso segment. First compute the checksum value + * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and + * then add in skb->csum (checksum from csum_start to end of packet). + * skb->csum and csum_start are then updated to reflect the checksum of the + * resultant packet starting from the transport header-- the resultant checksum + * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo + * header. + */ +static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) +{ + int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - + skb_transport_offset(skb); + __u16 csum; + + csum = csum_fold(csum_partial(skb_transport_header(skb), + plen, skb->csum)); + skb->csum = res; + SKB_GSO_CB(skb)->csum_start -= plen; + + return csum; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/net/gre.h b/include/net/gre.h index 70046a0b0b89..b53182018743 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,9 +37,10 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len); static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool gre_csum) + bool csum) { - return iptunnel_handle_offloads(skb, gre_csum, SKB_GSO_GRE); + return iptunnel_handle_offloads(skb, csum, + csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 8ac5c21f8456..55236cb71174 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -82,5 +82,17 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) } #endif +static inline __sum16 udp_v6_check(int len, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __wsum base) +{ + return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base); +} + +void udp6_set_csum(bool nocheck, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int len); + int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif diff --git a/include/net/udp.h b/include/net/udp.h index 5eb86874bcd6..2ecfc6e15609 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -147,6 +147,15 @@ static inline __wsum udp_csum(struct sk_buff *skb) return csum; } +static inline __sum16 udp_v4_check(int len, __be32 saddr, + __be32 daddr, __wsum base) +{ + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base); +} + +void udp_set_csum(bool nocheck, struct sk_buff *skb, + __be32 saddr, __be32 daddr, int len); + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline void udp_lib_hash(struct sock *sk) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 7bb4084b1bd0..12196ce661d9 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -24,9 +24,19 @@ struct vxlan_sock { struct udp_offload udp_offloads; }; +#define VXLAN_F_LEARN 0x01 +#define VXLAN_F_PROXY 0x02 +#define VXLAN_F_RSC 0x04 +#define VXLAN_F_L2MISS 0x08 +#define VXLAN_F_L3MISS 0x10 +#define VXLAN_F_IPV6 0x20 +#define VXLAN_F_UDP_CSUM 0x40 +#define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 +#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 + struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, - bool no_share, bool ipv6); + bool no_share, u32 flags); void vxlan_sock_release(struct vxlan_sock *vs); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 622e7910b8cc..b38534895db5 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -319,6 +319,9 @@ enum { IFLA_VXLAN_PORT, /* destination port */ IFLA_VXLAN_GROUP6, IFLA_VXLAN_LOCAL6, + IFLA_VXLAN_UDP_CSUM, + IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + IFLA_VXLAN_UDP_ZERO_CSUM6_RX, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3f6c7e8be8a4..05f4bef2ce12 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2885,7 +2885,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (unlikely(!proto)) return ERR_PTR(-EINVAL); - csum = !!can_checksum_protocol(features, proto); + csum = !head_skb->encap_hdr_csum && + !!can_checksum_protocol(features, proto); + __skb_push(head_skb, doffset); headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -2983,6 +2985,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + offset; continue; } @@ -3052,6 +3056,8 @@ perform_csum_check: nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0e9bb08a91e4..d5e6836cf772 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1254,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | 0))) goto out; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index fbfd829f4049..4e9619bca732 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, ptr--; } if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) { *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, skb->len, 0)); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f1d32280cb54..24deb3928b9e 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP))) goto out; @@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; csum = !!(greh->flags & GRE_CSUM); + if (csum) + skb->encap_hdr_csum = 1; if (unlikely(!pskb_may_pull(skb, ghl))) goto out; @@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } } - greh = (struct gre_base_hdr *)(skb->data); + skb_reset_transport_header(skb); + + greh = (struct gre_base_hdr *) + skb_transport_header(skb); pcsum = (__be32 *)(greh + 1); *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + *(__sum16 *)pcsum = gso_make_checksum(skb, 0); } __skb_push(skb, tnl_hlen - ghl); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 847e69cbff7e..f4c987bb7e94 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, return skb; } + /* If packet is not gso and we are resolving any partial checksum, + * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL + * on the outer header without confusing devices that implement + * NETIF_F_IP_CSUM with encapsulation. + */ + if (csum_help) + skb->encapsulation = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { err = skb_checksum_help(skb); if (unlikely(err)) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b92b81718ca4..4e86c59ec7f7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -57,10 +57,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -97,9 +99,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->check = newcheck; if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); + th->check = gso_make_checksum(skb, ~th->check); seq += mss; if (copy_destructor) { @@ -133,8 +133,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); + th->check = gso_make_checksum(skb, ~th->check); out: return segs; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e07d52b8617a..8d8c33d84c9a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -762,6 +762,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) } EXPORT_SYMBOL_GPL(udp4_hwcsum); +/* Function to set UDP checksum for an IPv4 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp_set_csum(bool nocheck, struct sk_buff *skb, + __be32 saddr, __be32 daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v4_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} +EXPORT_SYMBOL(udp_set_csum); + static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) { struct sock *sk = skb->sk; @@ -2491,7 +2528,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; netdev_features_t enc_features; - int outer_hlen; + int udp_offset, outer_hlen; + unsigned int oldlen; + bool need_csum; + + oldlen = (u16)~skb->len; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -2503,6 +2544,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); skb->protocol = htons(ETH_P_TEB); + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); + if (need_csum) + skb->encap_hdr_csum = 1; + /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); @@ -2513,10 +2558,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, } outer_hlen = skb_tnl_header_len(skb); + udp_offset = outer_hlen - tnl_hlen; skb = segs; do { struct udphdr *uh; - int udp_offset = outer_hlen - tnl_hlen; + int len; skb_reset_inner_headers(skb); skb->encapsulation = 1; @@ -2527,31 +2573,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = htons(skb->len - udp_offset); + uh->len = htons(len); - /* csum segment if tunnel sets skb with csum. */ - if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { - struct iphdr *iph = ip_hdr(skb); + if (need_csum) { + __be32 delta = htonl(oldlen + len); + + uh->check = ~csum_fold((__force __wsum) + ((__force u32)uh->check + + (__force u32)delta)); + uh->check = gso_make_checksum(skb, ~uh->check); - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - udp_offset, - IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, - skb->len - udp_offset, 0)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; - - } else if (protocol == htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 len = skb->len - udp_offset; - - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - len, IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; } skb->protocol = protocol; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 88b4023ecfcf..7b1840110173 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, __wsum csum; if (skb->encapsulation && - skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + (skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features); goto out; } @@ -71,8 +72,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_MPLS) || + SKB_GSO_GRE | SKB_GSO_GRE_CSUM | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index da26224a5993..9a4d7322fb22 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -84,3 +84,41 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); + +/* Function to set UDP checksum for an IPv6 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp6_set_csum(bool nocheck, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v6_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} +EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b2f091566f88..65eda2a8af48 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b261ee8b83fc..0ae3d98f83e0 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS) || @@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; } - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + if (skb->encapsulation && skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features); else { /* Do software UFO. Complete and fill in the UDP checksum as HW cannot diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 379558014b60..bea259043205 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1095,33 +1095,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, return 0; } -#if IS_ENABLED(CONFIG_IPV6) -static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, - int udp_len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct udphdr *uh = udp_hdr(skb); - - if (udp_get_no_check6_tx(sk)) - skb->ip_summed = CHECKSUM_NONE; - else if (!skb_dst(skb) || !skb_dst(skb)->dev || - !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - __wsum csum = skb_checksum(skb, 0, udp_len, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, - IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, - udp_len, IPPROTO_UDP, 0); - } -} -#endif - /* If caller requires the skb to have a ppp header, the header must be * inserted in the skb data before calling this function. */ @@ -1133,7 +1106,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len struct flowi *fl; struct udphdr *uh; struct inet_sock *inet; - __wsum csum; int headroom; int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; int udp_len; @@ -1182,33 +1154,17 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len uh->dest = inet->inet_dport; udp_len = uhlen + hdr_len + data_len; uh->len = htons(udp_len); - uh->check = 0; /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) - l2tp_xmit_ipv6_csum(sk, skb, udp_len); + udp6_set_csum(udp_get_no_check6_tx(sk), + skb, &inet6_sk(sk)->saddr, + &sk->sk_v6_daddr, udp_len); else #endif - if (sk->sk_no_check_tx) - skb->ip_summed = CHECKSUM_NONE; - else if ((skb_dst(skb) && skb_dst(skb)->dev) && - (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { - skb->ip_summed = CHECKSUM_COMPLETE; - csum = skb_checksum(skb, 0, udp_len, 0); - uh->check = csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, 0); - } + udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, + inet->inet_daddr, udp_len); break; case L2TP_ENCAPTYPE_IP: diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 851cd880b0c0..6b38d083e1c9 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_MPLS))) goto out; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index a93efa3f64c3..0edbd95c60e7 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false); + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs;