From b0e010c527de742cb18e8d50a06dfd1a995b5382 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:28 -0700 Subject: [PATCH 01/30] ipvs: replace ip_vs_fill_ip4hdr with ip_vs_fill_iph_skb_off This removes some duplicated code and makes the ICMPv6 path look more like the ICMP path. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 49 +++++++++++++---------- net/netfilter/ipvs/ip_vs_core.c | 70 ++++++++++++--------------------- net/netfilter/ipvs/ip_vs_xmit.c | 15 +++---- 3 files changed, 63 insertions(+), 71 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9b9ca87a4210..0c8309f34982 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -104,6 +104,7 @@ static inline struct net *seq_file_single_net(struct seq_file *seq) extern int ip_vs_conn_tab_size; struct ip_vs_iphdr { + __u32 off; /* Where IP or IPv4 header starts */ __u32 len; /* IPv4 simply where L4 starts * IPv6 where L4 Transport Header starts */ __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ @@ -120,48 +121,56 @@ static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, return skb_header_pointer(skb, offset, len, buffer); } -static inline void -ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr) -{ - const struct iphdr *iph = nh; - - iphdr->len = iph->ihl * 4; - iphdr->fragoffs = 0; - iphdr->protocol = iph->protocol; - iphdr->saddr.ip = iph->saddr; - iphdr->daddr.ip = iph->daddr; -} - /* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6. * IPv6 requires some extra work, as finding proper header position, * depend on the IPv6 extension headers. */ -static inline void -ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) +static inline int +ip_vs_fill_iph_skb_off(int af, const struct sk_buff *skb, int offset, + struct ip_vs_iphdr *iphdr) { + iphdr->off = offset; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - const struct ipv6hdr *iph = - (struct ipv6hdr *)skb_network_header(skb); + struct ipv6hdr _iph; + const struct ipv6hdr *iph = skb_header_pointer( + skb, offset, sizeof(_iph), &_iph); + if (!iph) + return 0; + iphdr->saddr.in6 = iph->saddr; iphdr->daddr.in6 = iph->daddr; /* ipv6_find_hdr() updates len, flags */ - iphdr->len = 0; + iphdr->len = offset; iphdr->flags = 0; iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, &iphdr->fragoffs, &iphdr->flags); + if (iphdr->protocol < 0) + return 0; } else #endif { - const struct iphdr *iph = - (struct iphdr *)skb_network_header(skb); - iphdr->len = iph->ihl * 4; + struct iphdr _iph; + const struct iphdr *iph = skb_header_pointer( + skb, offset, sizeof(_iph), &_iph); + if (!iph) + return 0; + + iphdr->len = offset + iph->ihl * 4; iphdr->fragoffs = 0; iphdr->protocol = iph->protocol; iphdr->saddr.ip = iph->saddr; iphdr->daddr.ip = iph->daddr; } + + return 1; +} + +static inline int +ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) +{ + return ip_vs_fill_iph_skb_off(af, skb, skb_network_offset(skb), iphdr); } static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 38fbc194b9cb..b831fe84bff7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -436,7 +436,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * with persistence the connection is created on SYN+ACK. */ if (pptr[0] == FTPDATA) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling FTPDATA"); return NULL; } @@ -446,7 +446,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); return NULL; @@ -934,8 +934,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); - ip_vs_fill_ip4hdr(cih, &ciph); - ciph.len += offset; + ip_vs_fill_iph_skb_off(AF_INET, skb, offset, &ciph); + /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); if (!cp) @@ -951,12 +951,11 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; union nf_inet_addr snet; - unsigned int writable; + unsigned int offset; *related = 1; ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); @@ -984,17 +983,9 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &ipvsh->saddr, &ipvsh->daddr); - /* Now find the contained IP header */ - ciph.len = ipvsh->len + sizeof(_icmph); - ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); - if (ip6h == NULL) + if (!ip_vs_fill_iph_skb_off(AF_INET6, skb, ipvsh->len + sizeof(_icmph), + &ciph)) return NF_ACCEPT; /* The packet looks wrong, ignore */ - ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */ - ciph.daddr.in6 = ip6h->daddr; - /* skip possible IPv6 exthdrs of contained IPv6 packet */ - ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); - if (ciph.protocol < 0) - return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ pp = ip_vs_proto_get(ciph.protocol); if (!pp) @@ -1006,9 +997,9 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, return NF_ACCEPT; snet.in6 = ciph.saddr.in6; - writable = ciph.len; + offset = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, - pp, writable, sizeof(struct ipv6hdr), + pp, offset, sizeof(struct ipv6hdr), hooknum); } #endif @@ -1093,7 +1084,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; - IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (!skb_make_writable(skb, iph->len)) goto drop; @@ -1130,7 +1121,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (ip_vs_route_me_harder(af, skb, hooknum)) goto drop; - IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); + IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); @@ -1221,7 +1212,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_defrag_user(hooknum))) return NF_STOLEN; - ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(AF_INET, skb, &iph); } /* @@ -1272,7 +1263,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) } } } - IP_VS_DBG_PKT(12, af, pp, skb, 0, + IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; } @@ -1416,9 +1407,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) "Checking incoming ICMP for"); offset2 = offset; - ip_vs_fill_ip4hdr(cih, &ciph); - ciph.len += offset; + ip_vs_fill_iph_skb_off(AF_INET, skb, offset, &ciph); offset = ciph.len; + /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ @@ -1511,13 +1502,12 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum, struct ip_vs_iphdr *iph) { struct net *net = NULL; - struct ipv6hdr _ip6h, *ip6h; struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offs_ciph, writable, verdict; + unsigned int offset, verdict; *related = 1; @@ -1546,18 +1536,9 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); - /* Now find the contained IP header */ - ciph.len = iph->len + sizeof(_icmph); - offs_ciph = ciph.len; /* Save ip header offset */ - ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); - if (ip6h == NULL) - return NF_ACCEPT; /* The packet looks wrong, ignore */ - ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */ - ciph.daddr.in6 = ip6h->daddr; - /* skip possible IPv6 exthdrs of contained IPv6 packet */ - ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); - if (ciph.protocol < 0) - return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ + offset = iph->len + sizeof(_icmph); + if (!ip_vs_fill_iph_skb_off(AF_INET6, skb, offset, &ciph)) + return NF_ACCEPT; net = skb_net(skb); pd = ip_vs_proto_data_get(net, ciph.protocol); @@ -1569,7 +1550,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, if (ciph.fragoffs) return NF_ACCEPT; - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, "Checking incoming ICMPv6 for"); /* The embedded headers contain source and dest in reverse order @@ -1591,12 +1572,12 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ip_vs_in_stats(cp, skb); /* Need to mangle contained IPv6 header in ICMPv6 packet */ - writable = ciph.len; + offset = ciph.len; if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || IPPROTO_SCTP == ciph.protocol) - writable += 2 * sizeof(__u16); /* Also mangle ports */ + offset += 2 * sizeof(__u16); /* Also mangle ports */ - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph); __ip_vs_conn_put(cp); @@ -1720,12 +1701,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * is missing module nf_defrag_ipv6 */ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); - IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment"); + IP_VS_DBG_PKT(7, af, pp, skb, iph.off, + "unhandled fragment"); } return NF_ACCEPT; } - IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); + IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 258a0b0e82a2..9a26f2ea86d9 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -723,7 +723,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error; @@ -733,8 +733,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { - IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " - "stopping DNAT to loopback address"); + IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off, + "ip_vs_nat_xmit(): stopping DNAT to loopback " + "address"); goto tx_error; } @@ -751,7 +752,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); - IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -812,7 +813,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error; @@ -823,7 +824,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { - IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, + IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error; @@ -841,7 +842,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; - IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still From 4fd9beef37f3a14aa34cb025479a0e431dc9e58b Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:29 -0700 Subject: [PATCH 02/30] ipvs: Add hdr_flags to iphdr These flags contain information like whether or not the addresses are inverted or from icmp. The first will allow us to drop an inverse param all over the place, and the second will later be useful in scheduling icmp. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 43 ++++++++++++++++++++++++++++--- net/netfilter/ipvs/ip_vs_core.c | 20 +++++++------- net/netfilter/ipvs/ip_vs_pe_sip.c | 2 +- net/netfilter/xt_ipvs.c | 2 +- 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0c8309f34982..ac336a79ad3d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -29,6 +29,9 @@ #endif #include /* Netw namespace */ +#define IP_VS_HDR_INVERSE 1 +#define IP_VS_HDR_ICMP 2 + /* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) { @@ -104,6 +107,7 @@ static inline struct net *seq_file_single_net(struct seq_file *seq) extern int ip_vs_conn_tab_size; struct ip_vs_iphdr { + int hdr_flags; /* ipvs flags */ __u32 off; /* Where IP or IPv4 header starts */ __u32 len; /* IPv4 simply where L4 starts * IPv6 where L4 Transport Header starts */ @@ -127,9 +131,11 @@ static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, */ static inline int ip_vs_fill_iph_skb_off(int af, const struct sk_buff *skb, int offset, - struct ip_vs_iphdr *iphdr) + int hdr_flags, struct ip_vs_iphdr *iphdr) { + iphdr->hdr_flags = hdr_flags; iphdr->off = offset; + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { struct ipv6hdr _iph; @@ -168,9 +174,40 @@ ip_vs_fill_iph_skb_off(int af, const struct sk_buff *skb, int offset, } static inline int -ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) +ip_vs_fill_iph_skb_icmp(int af, const struct sk_buff *skb, int offset, + bool inverse, struct ip_vs_iphdr *iphdr) { - return ip_vs_fill_iph_skb_off(af, skb, skb_network_offset(skb), iphdr); + int hdr_flags = IP_VS_HDR_ICMP; + + if (inverse) + hdr_flags |= IP_VS_HDR_INVERSE; + + return ip_vs_fill_iph_skb_off(af, skb, offset, hdr_flags, iphdr); +} + +static inline int +ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, bool inverse, + struct ip_vs_iphdr *iphdr) +{ + int hdr_flags = 0; + + if (inverse) + hdr_flags |= IP_VS_HDR_INVERSE; + + return ip_vs_fill_iph_skb_off(af, skb, skb_network_offset(skb), + hdr_flags, iphdr); +} + +static inline bool +ip_vs_iph_inverse(const struct ip_vs_iphdr *iph) +{ + return !!(iph->hdr_flags & IP_VS_HDR_INVERSE); +} + +static inline bool +ip_vs_iph_icmp(const struct ip_vs_iphdr *iph) +{ + return !!(iph->hdr_flags & IP_VS_HDR_ICMP); } static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b831fe84bff7..4f5d3d174517 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -934,7 +934,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); - ip_vs_fill_iph_skb_off(AF_INET, skb, offset, &ciph); + ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); @@ -983,8 +983,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &ipvsh->saddr, &ipvsh->daddr); - if (!ip_vs_fill_iph_skb_off(AF_INET6, skb, ipvsh->len + sizeof(_icmph), - &ciph)) + if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph), + true, &ciph)) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(ciph.protocol); @@ -1177,7 +1177,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (!net_ipvs(net)->enable) return NF_ACCEPT; - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { @@ -1212,7 +1212,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_defrag_user(hooknum))) return NF_STOLEN; - ip_vs_fill_iph_skb(AF_INET, skb, &iph); + ip_vs_fill_iph_skb(AF_INET, skb, false, &iph); } /* @@ -1407,7 +1407,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) "Checking incoming ICMP for"); offset2 = offset; - ip_vs_fill_iph_skb_off(AF_INET, skb, offset, &ciph); + ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph); offset = ciph.len; /* The embedded headers contain source and dest in reverse order. @@ -1537,7 +1537,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, &iph->saddr, &iph->daddr); offset = iph->len + sizeof(_icmph); - if (!ip_vs_fill_iph_skb_off(AF_INET6, skb, offset, &ciph)) + if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph)) return NF_ACCEPT; net = skb_net(skb); @@ -1614,7 +1614,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, @@ -1627,7 +1627,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); /* Bad... Do not break raw sockets */ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && @@ -1841,7 +1841,7 @@ ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, struct netns_ipvs *ipvs; struct ip_vs_iphdr iphdr; - ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index bed5f7042529..1b8d594e493a 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) const char *dptr; int retc; - ip_vs_fill_iph_skb(p->af, skb, &iph); + ip_vs_fill_iph_skb(p->af, skb, false, &iph); /* Only useful with UDP */ if (iph.protocol != IPPROTO_UDP) diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 8d47c3780fda..370462572d84 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) goto out; } - ip_vs_fill_iph_skb(family, skb, &iph); + ip_vs_fill_iph_skb(family, skb, true, &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ From 0b72902120d89153996f411bb3ec8ca7b4baf1d8 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:30 -0700 Subject: [PATCH 03/30] ipvs: Handle inverse and icmp headers in ip_vs_leave Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f5d3d174517..bb998d432ce7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -519,6 +519,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return cp; } +#ifdef CONFIG_SYSCTL +static inline int ip_vs_addr_is_unicast(struct net *net, int af, + union nf_inet_addr *addr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST; +#endif + return (inet_addr_type(net, addr->ip) == RTN_UNICAST); +} +#endif /* * Pass or drop the packet. @@ -528,33 +539,28 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, dport; #ifdef CONFIG_SYSCTL struct net *net; struct netns_ipvs *ipvs; - int unicast; #endif pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); - if (pptr == NULL) { + if (!pptr) return NF_DROP; - } + dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; #ifdef CONFIG_SYSCTL net = skb_net(skb); -#ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) - unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST; - else -#endif - unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST); /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ ipvs = net_ipvs(net); - if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { + if (ipvs->sysctl_cache_bypass && svc->fwmark && + !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) && + ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) { int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -598,9 +604,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) + if (svc->port == FTPPORT && dport != FTPPORT) return NF_ACCEPT; + if (unlikely(ip_vs_iph_icmp(iph))) + return NF_DROP; + /* * Notify the client that the destination is unreachable, and * release the socket buffer. From 3b5ca61768457de5139229392d0333165abcf10f Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:31 -0700 Subject: [PATCH 04/30] ipvs: pull out ip_vs_try_to_schedule function This is necessary as we'll be trying to schedule icmp later and we'll want to share this code. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 60 +++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index bb998d432ce7..2c44e34314c2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1327,6 +1327,42 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, #endif +static unsigned int +ip_vs_try_to_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_protocol *pp = pd->pp; + + if (!iph->fragoffs) { + /* No (second) fragments need to enter here, as nf_defrag_ipv6 + * replayed fragment zero will already have created the cp + */ + + /* Schedule and create new connection entry into cpp */ + if (!pp->conn_schedule(af, skb, pd, verdict, cpp, iph)) + return 0; + } + + if (unlikely(!*cpp)) { + /* sorry, all this trouble for a no-hit :) */ + IP_VS_DBG_PKT(12, af, pp, skb, iph->off, + "ip_vs_in: packet continues traversal as normal"); + if (iph->fragoffs) { + /* Fragment that couldn't be mapped to a conn entry + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + IP_VS_DBG_PKT(7, af, pp, skb, iph->off, + "unhandled fragment"); + } + *verdict = NF_ACCEPT; + return 0; + } + + return 1; +} + /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, @@ -1690,33 +1726,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) cp = NULL; } - if (unlikely(!cp) && !iph.fragoffs) { - /* No (second) fragments need to enter here, as nf_defrag_ipv6 - * replayed fragment zero will already have created the cp - */ + if (unlikely(!cp)) { int v; - /* Schedule and create new connection entry into &cp */ - if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph)) + if (!ip_vs_try_to_schedule(af, skb, pd, &v, &cp, &iph)) return v; } - if (unlikely(!cp)) { - /* sorry, all this trouble for a no-hit :) */ - IP_VS_DBG_PKT(12, af, pp, skb, 0, - "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); - IP_VS_DBG_PKT(7, af, pp, skb, iph.off, - "unhandled fragment"); - } - return NF_ACCEPT; - } - IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); + /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ From 802c41adcf3be63f351c302c9665865d705cada9 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:32 -0700 Subject: [PATCH 05/30] ipvs: drop inverse argument to conn_{in,out}_get No longer necessary since the information is included in the ip_vs_iphdr itself. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++------ net/netfilter/ipvs/ip_vs_conn.c | 12 +++++----- net/netfilter/ipvs/ip_vs_core.c | 31 ++++++++++++++----------- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 17 +++++++------- net/netfilter/xt_ipvs.c | 2 +- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ac336a79ad3d..ba90729d1111 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -495,14 +495,12 @@ struct ip_vs_protocol { struct ip_vs_conn * (*conn_in_get)(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse); + const struct ip_vs_iphdr *iph); struct ip_vs_conn * (*conn_out_get)(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse); + const struct ip_vs_iphdr *iph); int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); @@ -1232,14 +1230,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse); + const struct ip_vs_iphdr *iph); struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse); + const struct ip_vs_iphdr *iph); /* Get reference to gain full access to conn. * By default, RCU read-side critical sections have access only to diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b0f7b626b56d..f71b3146a5a1 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -316,7 +316,7 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) static int ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) + struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; struct net *net = skb_net(skb); @@ -325,7 +325,7 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, if (pptr == NULL) return 1; - if (likely(!inverse)) + if (likely(!ip_vs_iph_inverse(iph))) ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], p); else @@ -336,11 +336,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) + const struct ip_vs_iphdr *iph) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) + if (ip_vs_conn_fill_param_proto(af, skb, iph, &p)) return NULL; return ip_vs_conn_in_get(&p); @@ -440,11 +440,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) + const struct ip_vs_iphdr *iph) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) + if (ip_vs_conn_fill_param_proto(af, skb, iph, &p)) return NULL; return ip_vs_conn_out_get(&p); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2c44e34314c2..ebfb371daa3b 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -444,12 +444,18 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Do not schedule replies from local real server. */ - if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, - "Not scheduling reply for existing connection"); - __ip_vs_conn_put(cp); - return NULL; + if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { + iph->hdr_flags ^= IP_VS_HDR_INVERSE; + cp = pp->conn_in_get(svc->af, skb, iph); + iph->hdr_flags ^= IP_VS_HDR_INVERSE; + + if (cp) { + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, + "Not scheduling reply for existing" + " connection"); + __ip_vs_conn_put(cp); + return NULL; + } } /* @@ -946,7 +952,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1001,7 +1007,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1227,7 +1233,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, &iph, 0); + cp = pp->conn_out_get(af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); @@ -1458,7 +1464,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1601,8 +1607,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, /* The embedded headers contain source and dest in reverse order * if not from localhost */ - cp = pp->conn_in_get(AF_INET6, skb, &ciph, - (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); + cp = pp->conn_in_get(AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1712,7 +1717,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, &iph, 0); + cp = pp->conn_in_get(af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); if (conn_reuse_mode && !iph.fragoffs && diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5de3dd312c0f..be1791d1c03f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -42,10 +42,10 @@ struct isakmp_hdr { static void ah_esp_conn_fill_param_proto(struct net *net, int af, - const struct ip_vs_iphdr *iph, int inverse, + const struct ip_vs_iphdr *iph, struct ip_vs_conn_param *p) { - if (likely(!inverse)) + if (likely(!ip_vs_iph_inverse(iph))) ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); @@ -57,14 +57,13 @@ ah_esp_conn_fill_param_proto(struct net *net, int af, static struct ip_vs_conn * ah_esp_conn_in_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse) + const struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -73,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, */ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", - inverse ? "ICMP+" : "", + ip_vs_iph_icmp(iph) ? "ICMP+" : "", ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); @@ -85,18 +84,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) + const struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", - inverse ? "ICMP+" : "", + ip_vs_iph_icmp(iph) ? "ICMP+" : "", ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 370462572d84..452ba2a3e7ae 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph); if (unlikely(cp == NULL)) { match = false; goto out; From ee78378f97648834d22ce97e91633ea40f044e3d Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:33 -0700 Subject: [PATCH 06/30] ipvs: Make ip_vs_schedule aware of inverse iph'es This is necessary to schedule icmp later. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 50 ++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index ebfb371daa3b..6465e7b3e891 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc, const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ + const union nf_inet_addr *src_addr, *dst_addr; + + if (likely(!ip_vs_iph_inverse(iph))) { + src_addr = &iph->saddr; + dst_addr = &iph->daddr; + } else { + src_addr = &iph->daddr; + dst_addr = &iph->saddr; + } + /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, + ipv6_addr_prefix(&snet.in6, &src_addr->in6, (__force __u32) svc->netmask); else #endif - snet.ip = iph->saddr.ip & svc->netmask; + snet.ip = src_addr->ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", - IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), - IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), + IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port), + IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* @@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ { int protocol = iph->protocol; - const union nf_inet_addr *vaddr = &iph->daddr; + const union nf_inet_addr *vaddr = dst_addr; __be16 vport = 0; if (dst_port == svc->port) { @@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, - src_port, &iph->daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, src_addr, + src_port, dst_addr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest, skb->mark); @@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_conn *cp = NULL; struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, cport, vport; + const void *caddr, *vaddr; unsigned int flags; *ignored = 1; @@ -429,13 +440,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, if (pptr == NULL) return NULL; + if (likely(!ip_vs_iph_inverse(iph))) { + cport = pptr[0]; + caddr = &iph->saddr; + vport = pptr[1]; + vaddr = &iph->daddr; + } else { + cport = pptr[1]; + caddr = &iph->daddr; + vport = pptr[0]; + vaddr = &iph->saddr; + } + /* * FTPDATA needs this check when using local real server. * Never schedule Active FTPDATA connections from real server. * For LVS-NAT they must be already created. For other methods * with persistence the connection is created on SYN+ACK. */ - if (pptr[0] == FTPDATA) { + if (cport == FTPDATA) { IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling FTPDATA"); return NULL; @@ -462,7 +485,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, + return ip_vs_sched_persist(svc, skb, cport, vport, ignored, iph); *ignored = 0; @@ -470,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Non-persistent service */ - if (!svc->fwmark && pptr[1] != svc->port) { + if (!svc->fwmark && vport != svc->port) { if (!svc->port) pr_err("Schedule: port zero only supported " "in persistent services, " @@ -502,10 +525,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, - &iph->saddr, pptr[0], &iph->daddr, - pptr[1], &p); + caddr, cport, vaddr, vport, &p); cp = ip_vs_conn_new(&p, dest->af, &dest->addr, - dest->port ? dest->port : pptr[1], + dest->port ? dest->port : vport, flags, dest, skb->mark); if (!cp) { *ignored = -1; From 94485fedcb176a9105961e47bd9dda23801c1906 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:34 -0700 Subject: [PATCH 07/30] ipvs: add schedule_icmp sysctl This sysctl will be used to enable the scheduling of icmp packets. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 11 +++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ba90729d1111..47677f0493c7 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -997,6 +997,7 @@ struct netns_ipvs { int sysctl_pmtu_disc; int sysctl_backup_only; int sysctl_conn_reuse_mode; + int sysctl_schedule_icmp; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1115,6 +1116,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) return ipvs->sysctl_conn_reuse_mode; } +static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_schedule_icmp; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1187,6 +1193,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) +{ + return 0; +} + #endif /* IPVS core functions diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 1a23e91d50d8..31d80e203863 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1844,6 +1844,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "schedule_icmp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3895,7 +3901,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_conn_reuse_mode = 1; tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; - + tbl[idx++].data = &ipvs->sysctl_schedule_icmp; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { From 3481894fcb0d7d3c226618e6b73a27796a7a3eca Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:35 -0700 Subject: [PATCH 08/30] ipvs: Use outer header in ip_vs_bypass_xmit_v6 The ip_vs_iphdr may refer to an internal header, so use the outer one instead. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 9a26f2ea86d9..af5e9d3b4de9 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -656,10 +656,12 @@ int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { + struct ipv6hdr *iph = ipv6_hdr(skb); + EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, + if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; From 1471f35efa86407fc180ca6d55363c684d166ef6 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:36 -0700 Subject: [PATCH 09/30] ipvs: sh: support scheduling icmp/inverse packets consistently "source_hash" the dest fields if it's an inverse packet. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sh.c | 47 ++++++++++++++++------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 98a13433b68c..1e373a5e44e3 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, static inline __be16 ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - __be16 port; - struct tcphdr _tcph, *th; - struct udphdr _udph, *uh; - sctp_sctphdr_t _sctph, *sh; + __be16 _ports[2], *ports; + /* At this point we know that we have a valid packet of some kind. + * Because ICMP packets are only guaranteed to have the first 8 + * bytes, let's just grab the ports. Fortunately they're in the + * same position for all three of the protocols we care about. + */ switch (iph->protocol) { case IPPROTO_TCP: - th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); - if (unlikely(th == NULL)) - return 0; - port = th->source; - break; case IPPROTO_UDP: - uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); - if (unlikely(uh == NULL)) - return 0; - port = uh->source; - break; case IPPROTO_SCTP: - sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (unlikely(sh == NULL)) + ports = skb_header_pointer(skb, iph->len, sizeof(_ports), + &_ports); + if (unlikely(!ports)) return 0; - port = sh->source; - break; - default: - port = 0; - } - return port; + if (likely(!ip_vs_iph_inverse(iph))) + return ports[0]; + else + return ports[1]; + default: + return 0; + } } @@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_dest *dest; struct ip_vs_sh_state *s; __be16 port = 0; + const union nf_inet_addr *hash_addr; + + hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); @@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, s = (struct ip_vs_sh_state *) svc->sched_data; if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) - dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port); else - dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + dest = ip_vs_sh_get(svc, s, hash_addr, port); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); @@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph->saddr), + IP_VS_DBG_ADDR(svc->af, hash_addr), IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); From 6044eeffafbe35154c5d3b04b73e8938a62e5d39 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:37 -0700 Subject: [PATCH 10/30] ipvs: attempt to schedule icmp packets Invoke the try_to_schedule logic from the icmp path and update it to the appropriate ip_vs_conn_put function. The schedule functions have been updated to reject the packets immediately for now. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 45 +++++++++++++++++++++------ net/netfilter/ipvs/ip_vs_proto_sctp.c | 6 ++++ net/netfilter/ipvs/ip_vs_proto_tcp.c | 7 +++++ net/netfilter/ipvs/ip_vs_proto_udp.c | 6 ++++ 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6465e7b3e891..99be6801c795 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1409,7 +1409,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, offset2, ihl, verdict; - bool ipip; + bool ipip, new_cp = false; *related = 1; @@ -1487,8 +1487,17 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) * For IPIP this is error for request, not for reply. */ cp = pp->conn_in_get(AF_INET, skb, &ciph); - if (!cp) - return NF_ACCEPT; + + if (!cp) { + int v; + + if (!sysctl_schedule_icmp(net_ipvs(net))) + return NF_ACCEPT; + + if (!ip_vs_try_to_schedule(AF_INET, skb, pd, &v, &cp, &ciph)) + return v; + new_cp = true; + } verdict = NF_DROP; @@ -1565,7 +1574,10 @@ ignore_ipip: verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: - __ip_vs_conn_put(cp); + if (likely(!new_cp)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); return verdict; } @@ -1581,6 +1593,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, verdict; + bool new_cp = false; *related = 1; @@ -1631,13 +1644,23 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, */ cp = pp->conn_in_get(AF_INET6, skb, &ciph); - if (!cp) - return NF_ACCEPT; + if (!cp) { + int v; + + if (!sysctl_schedule_icmp(net_ipvs(net))) + return NF_ACCEPT; + + if (!ip_vs_try_to_schedule(AF_INET6, skb, pd, &v, &cp, &ciph)) + return v; + + new_cp = true; + } + /* VS/TUN, VS/DR and LOCALNODE just let it go */ if ((hooknum == NF_INET_LOCAL_OUT) && (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { - __ip_vs_conn_put(cp); - return NF_ACCEPT; + verdict = NF_ACCEPT; + goto out; } /* do the statistics and put it back */ @@ -1651,7 +1674,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph); - __ip_vs_conn_put(cp); +out: + if (likely(!new_cp)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); return verdict; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 5b84c0b56642..cd2984f3dad7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -19,6 +19,12 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; + if (ip_vs_iph_icmp(iph)) { + /* TEMPORARY - do not schedule icmp yet */ + *verdict = NF_ACCEPT; + return 0; + } + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); if (sh == NULL) { *verdict = NF_DROP; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 8e92beb0cca9..dbc707514f29 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -41,6 +41,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct tcphdr _tcph, *th; struct netns_ipvs *ipvs; + if (ip_vs_iph_icmp(iph)) { + /* TEMPORARY - do not schedule icmp yet */ + *verdict = NF_ACCEPT; + return 0; + } + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; @@ -48,6 +54,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } net = skb_net(skb); ipvs = net_ipvs(net); + /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index b62a3c0ff9bf..1403be250988 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -37,6 +37,12 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_service *svc; struct udphdr _udph, *uh; + if (ip_vs_iph_icmp(iph)) { + /* TEMPORARY - do not schedule icmp yet */ + *verdict = NF_ACCEPT; + return 0; + } + /* IPv6 fragments, only first fragment will hit this */ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); if (uh == NULL) { From 89621f31d18b81a6c66a97fc2a80b3b6e5937a81 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:38 -0700 Subject: [PATCH 11/30] ipvs: ensure that ICMP cannot be sent in reply to ICMP Check the header for icmp before sending a PACKET_TOO_BIG Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index af5e9d3b4de9..c5be055ae32e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -224,7 +224,7 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, if (!skb->dev) skb->dev = net->loopback_dev; /* only send ICMP too big on first fragment */ - if (!ipvsh->fragoffs) + if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); @@ -242,7 +242,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, return true; if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && - skb->len > mtu && !skb_is_gso(skb))) { + skb->len > mtu && !skb_is_gso(skb) && + !ip_vs_iph_icmp(ipvsh))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG(1, "frag needed for %pI4\n", From 8f88ea68e6146ff2d0df263b1f3e6e655e69ba98 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:39 -0700 Subject: [PATCH 12/30] ipvs: support scheduling inverse and icmp TCP packets In the event of an icmp packet, take only the ports instead of trying to grab the full header. In the event of an inverse packet, use the source address and port. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_tcp.c | 45 +++++++++++++++++++--------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index dbc707514f29..8f43cf6044e9 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -40,26 +40,43 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct netns_ipvs *ipvs; + __be16 _ports[2], *ports = NULL; - if (ip_vs_iph_icmp(iph)) { - /* TEMPORARY - do not schedule icmp yet */ - *verdict = NF_ACCEPT; - return 0; - } - - th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); - if (th == NULL) { - *verdict = NF_DROP; - return 0; - } net = skb_net(skb); ipvs = net_ipvs(net); + /* In the event of icmp, we're only guaranteed to have the first 8 + * bytes of the transport header, so we only check the rest of the + * TCP packet for non-ICMP packets + */ + if (likely(!ip_vs_iph_icmp(iph))) { + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + if (th) { + if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn)) + return 1; + ports = &th->source; + } + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); + } + + if (!ports) { + *verdict = NF_DROP; + return 0; + } + /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && - (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + + if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { From 2b0f39ef3d8951036d778ddee503e2fdf8e4ff4f Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:40 -0700 Subject: [PATCH 13/30] ipvs: support scheduling inverse and icmp UDP packets In the event of an icmp packet, take only the ports instead of trying to grab the full header. In the event of an inverse packet, use the source address and port. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_udp.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 1403be250988..f3aa821efb89 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -36,23 +36,32 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; + __be16 _ports[2], *ports = NULL; - if (ip_vs_iph_icmp(iph)) { - /* TEMPORARY - do not schedule icmp yet */ - *verdict = NF_ACCEPT; - return 0; + if (likely(!ip_vs_iph_icmp(iph))) { + /* IPv6 fragments, only first fragment will hit this */ + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + if (uh) + ports = &uh->source; + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); } - /* IPv6 fragments, only first fragment will hit this */ - uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); - if (uh == NULL) { + if (!ports) { *verdict = NF_DROP; return 0; } + net = skb_net(skb); rcu_read_lock(); - svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + if (svc) { int ignored; From 5e26b1b3abce05c177feb589260031519a1bc7b1 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2015 09:40:41 -0700 Subject: [PATCH 14/30] ipvs: support scheduling inverse and icmp SCTP packets In the event of an icmp packet, take only the ports instead of trying to grab the full header. In the event of an inverse packet, use the source address and port. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 38 ++++++++++++++++----------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index cd2984f3dad7..2026fca7e1c3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -18,22 +18,24 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; + __be16 _ports[2], *ports = NULL; - if (ip_vs_iph_icmp(iph)) { - /* TEMPORARY - do not schedule icmp yet */ - *verdict = NF_ACCEPT; - return 0; + if (likely(!ip_vs_iph_icmp(iph))) { + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + if (sh) { + sch = skb_header_pointer( + skb, iph->len + sizeof(sctp_sctphdr_t), + sizeof(_schunkh), &_schunkh); + if (sch && (sch->type == SCTP_CID_INIT || + sysctl_sloppy_sctp(ipvs))) + ports = &sh->source; + } + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); } - sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) { - *verdict = NF_DROP; - return 0; - } - - sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), - sizeof(_schunkh), &_schunkh); - if (sch == NULL) { + if (!ports) { *verdict = NF_DROP; return 0; } @@ -41,9 +43,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, net = skb_net(skb); ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && - (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { From 4e478098ac0ac1b6ef9a70fcdc2ec8b93f1b59a1 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Mon, 14 Sep 2015 23:23:05 -0700 Subject: [PATCH 15/30] ipvs: add sysctl to ignore tunneled packets This is a way to avoid nasty routing loops when multiple ipvs instances can forward to eachother. Signed-off-by: Alex Gartrell Signed-off-by: Simon Horman --- Documentation/networking/ipvs-sysctl.txt | 10 ++++++++++ include/net/ip_vs.h | 11 +++++++++++ net/netfilter/ipvs/ip_vs_core.c | 10 +++++++++- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index 3ba709531adb..e6b1c025fdd8 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -157,6 +157,16 @@ expire_quiescent_template - BOOLEAN persistence template if it is to be used to schedule a new connection and the destination server is quiescent. +ignore_tunneled - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + If set, ipvs will set the ipvs_property on all packets which are of + unrecognized protocols. This prevents us from routing tunneled + protocols like ipip, which is useful to prevent rescheduling + packets that have been tunneled to the ipvs host (i.e. to prevent + ipvs routing loops when ipvs is also acting as a real server). + nat_icmp_send - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 47677f0493c7..1096a71ab6ed 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -998,6 +998,7 @@ struct netns_ipvs { int sysctl_backup_only; int sysctl_conn_reuse_mode; int sysctl_schedule_icmp; + int sysctl_ignore_tunneled; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1121,6 +1122,11 @@ static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) return ipvs->sysctl_schedule_icmp; } +static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_ignore_tunneled; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1198,6 +1204,11 @@ static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs) return 0; } +static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs) +{ + return 0; +} + #endif /* IPVS core functions diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 99be6801c795..453972c6909e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1760,8 +1760,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* Protocol supported? */ pd = ip_vs_proto_data_get(net, iph.protocol); - if (unlikely(!pd)) + if (unlikely(!pd)) { + /* The only way we'll see this packet again is if it's + * encapsulated, so mark it with ipvs_property=1 so we + * skip it if we're ignoring tunneled packets + */ + if (sysctl_ignore_tunneled(ipvs)) + skb->ipvs_property = 1; + return NF_ACCEPT; + } pp = pd->pp; /* * Check if the packet belongs to an existing connection entry diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 31d80e203863..7338827ee5e9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1850,6 +1850,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ignore_tunneled", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3902,6 +3908,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_conn_reuse_mode = 1; tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; + tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { From 97b59c3a91d5ee4777658ff2136d1fdf13bd23d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:54 -0500 Subject: [PATCH 16/30] netfilter: ebtables: Simplify the arguments to ebt_do_table Nearly everything thing of interest to ebt_do_table is already present in nf_hook_state. Simplify ebt_do_table by just passing in the skb, nf_hook_state, and the table. This make the code easier to read and maintenance easier. To support this create an nf_hook_state on the stack in ebt_broute (the only caller without a nf_hook_state already available). This new nf_hook_state adds no new computations to ebt_broute, but does use a few more bytes of stack. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- net/bridge/netfilter/ebtable_broute.c | 8 ++++++-- net/bridge/netfilter/ebtable_filter.c | 6 ++---- net/bridge/netfilter/ebtable_nat.c | 6 ++---- net/bridge/netfilter/ebtables.c | 13 +++++++------ 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 8ca6d6464ea3..2ea517c7c6b9 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -111,9 +111,9 @@ struct ebt_table { extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - struct ebt_table *table); +extern unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table); /* Used in the kernel match() functions */ #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d2cdf5d6e98c..ec94c6f1ae88 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -50,10 +50,14 @@ static const struct ebt_table broute_table = { static int ebt_broute(struct sk_buff *skb) { + struct nf_hook_state state; int ret; - ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, - dev_net(skb->dev)->xt.broute_table); + nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN, + NFPROTO_BRIDGE, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + + ret = ebt_do_table(skb, &state, state.net->xt.broute_table); if (ret == NF_DROP) return 1; /* route it */ return 0; /* bridge it */ diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index ab20d6ed6e2f..118ce40ac181 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -60,16 +60,14 @@ static unsigned int ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - state->net->xt.frame_filter); + return ebt_do_table(skb, state, state->net->xt.frame_filter); } static unsigned int ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - state->net->xt.frame_filter); + return ebt_do_table(skb, state, state->net->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index ad81a5a65644..56c3329d6c37 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -60,16 +60,14 @@ static unsigned int ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - state->net->xt.frame_nat); + return ebt_do_table(skb, state, state->net->xt.frame_nat); } static unsigned int ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - state->net->xt.frame_nat); + return ebt_do_table(skb, state, state->net->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 48b6b01295de..8d5a3975b963 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -183,10 +183,11 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) } /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - struct ebt_table *table) +unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table) { + unsigned int hook = state->hook; int i, nentries; struct ebt_entry *point; struct ebt_counter *counter_base, *cb_base; @@ -199,8 +200,8 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, struct xt_action_param acpar; acpar.family = NFPROTO_BRIDGE; - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.hotdrop = false; acpar.hooknum = hook; @@ -220,7 +221,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, skb, in, out)) + if (ebt_basic_match(point, skb, state->in, state->out)) goto letscontinue; if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) From 6cb8ff3f1a535b1d8eb5ea318932513d08eb3da7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:55 -0500 Subject: [PATCH 17/30] inet netfilter: Remove hook from ip6t_do_table, arp_do_table, ipt_do_table The values of ops->hooknum and state->hook are guaraneted to be equal making the hook argument to ip6t_do_table, arp_do_table, and ipt_do_table is unnecessary. Remove the unnecessary hook argument. In the callers use state->hook instead of ops->hooknum for clarity and to reduce the number of cachelines the callers touch. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 1 - include/linux/netfilter_ipv4/ip_tables.h | 1 - include/linux/netfilter_ipv6/ip6_tables.h | 1 - net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 3 +-- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 5 ++--- net/ipv4/netfilter/iptable_mangle.c | 12 +++++------- net/ipv4/netfilter/iptable_nat.c | 3 +-- net/ipv4/netfilter/iptable_raw.c | 5 ++--- net/ipv4/netfilter/iptable_security.c | 5 ++--- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 3 +-- net/ipv6/netfilter/ip6table_mangle.c | 12 +++++------- net/ipv6/netfilter/ip6table_nat.c | 3 +-- net/ipv6/netfilter/ip6table_raw.c | 3 +-- net/ipv6/netfilter/ip6table_security.c | 3 +-- 17 files changed, 25 insertions(+), 41 deletions(-) diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index c22a7fb8d0df..6f074db2f23d 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,7 +53,6 @@ extern struct xt_table *arpt_register_table(struct net *net, const struct arpt_replace *repl); extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 4073510da485..aa598f942c01 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -64,7 +64,6 @@ struct ipt_error { extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index b40d2b635778..0f76e5c674f9 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -30,7 +30,6 @@ extern struct xt_table *ip6t_register_table(struct net *net, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8f87fc38ccde..10eb2b297450 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -247,10 +247,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) } unsigned int arpt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table) { + unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index d217e4c19645..1352e12d4068 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -30,8 +30,7 @@ static unsigned int arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return arpt_do_table(skb, ops->hooknum, state, - state->net->ipv4.arptable_filter); + return arpt_do_table(skb, state, state->net->ipv4.arptable_filter); } static struct nf_hook_ops *arpfilter_ops __read_mostly; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5d514eac4c31..2b049e135de8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -285,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table) { + unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; /* Initializing verdict to NF_DROP keeps gcc happy. */ diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 32feff32b116..02d4c5395d6e 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -36,14 +36,13 @@ static unsigned int iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT && + if (state->hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; - return ipt_do_table(skb, ops->hooknum, state, - state->net->ipv4.iptable_filter); + return ipt_do_table(skb, state, state->net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4a5150fc9510..dc2ff6884999 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -58,8 +58,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state, - state->net->ipv4.iptable_mangle); + ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); @@ -83,14 +82,13 @@ iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT) + if (state->hook == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, state); - if (ops->hooknum == NF_INET_POST_ROUTING) - return ipt_do_table(skb, ops->hooknum, state, + if (state->hook == NF_INET_POST_ROUTING) + return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, ops->hooknum, state, - state->net->ipv4.iptable_mangle); + return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 4f4c64f81169..8ff63ac1f0d6 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -33,8 +33,7 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, const struct nf_hook_state *state, struct nf_conn *ct) { - return ipt_do_table(skb, ops->hooknum, state, - state->net->ipv4.nat_table); + return ipt_do_table(skb, state, state->net->ipv4.nat_table); } static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 20126e469ffb..bbb0523d87de 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -23,14 +23,13 @@ static unsigned int iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT && + if (state->hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; - return ipt_do_table(skb, ops->hooknum, state, - state->net->ipv4.iptable_raw); + return ipt_do_table(skb, state, state->net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 82fefd609b85..b92417038705 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -40,14 +40,13 @@ static unsigned int iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT && + if (state->hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* Somebody is playing with raw sockets. */ return NF_ACCEPT; - return ipt_do_table(skb, ops->hooknum, state, - state->net->ipv4.iptable_security); + return ipt_do_table(skb, state, state->net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cd9b401231d3..da6446b6e3f9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -314,10 +314,10 @@ ip6t_next_entry(const struct ip6t_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table) { + unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 2449005fb5dc..a7327f61b90c 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -35,8 +35,7 @@ static unsigned int ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip6t_do_table(skb, ops->hooknum, state, - state->net->ipv6.ip6table_filter); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a46dbf097d29..c2e061dcedf3 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state, - state->net->ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -79,14 +78,13 @@ static unsigned int ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT) + if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, state); - if (ops->hooknum == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, ops->hooknum, state, + if (state->hook == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, ops->hooknum, state, - state->net->ipv6.ip6table_mangle); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index a56451de127f..efa6754c4d06 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -35,8 +35,7 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, const struct nf_hook_state *state, struct nf_conn *ct) { - return ip6t_do_table(skb, ops->hooknum, state, - state->net->ipv6.ip6table_nat); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat); } static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 18e831e35782..fac6ad7c0a7c 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -22,8 +22,7 @@ static unsigned int ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip6t_do_table(skb, ops->hooknum, state, - state->net->ipv6.ip6table_raw); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 83bc96ae5d73..96c94fc240c8 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -39,8 +39,7 @@ static unsigned int ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip6t_do_table(skb, ops->hooknum, state, - state->net->ipv6.ip6table_security); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; From 082a758f042e1c1eb241bfc2308ddc2b4ef6840d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:56 -0500 Subject: [PATCH 18/30] inet netfilter: Prefer state->hook to ops->hooknum The values of nf_hook_state.hook and nf_hook_ops.hooknum must be the same by definition. We are more likely to access the fields in nf_hook_state over the fields in nf_hook_ops so with a little luck this results in fewer cache line misses, and slightly more consistent code. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++-- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 14 +++++++------- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 4 ++-- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 4 ++-- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 14 +++++++------- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 9564684876c9..15749cc5cf2b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -147,7 +147,7 @@ static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_conntrack_in(state->net, PF_INET, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, @@ -158,7 +158,7 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(state->net, PF_INET, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 9306ec4fab41..8aea536d2e83 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -83,7 +83,7 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { enum ip_defrag_users user = - nf_ct_defrag_user(ops->hooknum, skb); + nf_ct_defrag_user(state->hook, skb); if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 22f4579b0c2a..16da45a76dac 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -266,7 +266,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. @@ -295,7 +295,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - ops->hooknum)) + state->hook)) return NF_DROP; else return NF_ACCEPT; @@ -312,17 +312,17 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (ret != NF_ACCEPT) return ret; - if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) + if (nf_nat_initialized(ct, HOOK2MANIP(state->hook))) break; - ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + ret = nf_nat_alloc_null_binding(ct, state->hook); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } @@ -332,11 +332,11 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + return nf_nat_packet(ct, ctinfo, state->hook, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1ef1b79def56..339be1d59afc 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -169,7 +169,7 @@ static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_conntrack_in(state->net, PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, @@ -181,7 +181,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return nf_conntrack_in(state->net, PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 6b576be3c83e..a9c08520596b 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -63,7 +63,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(state->hook, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->net, state->sk, reasm, + NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 70fbaed49edb..8bc94907dbd9 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -272,7 +272,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -303,7 +303,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - ops->hooknum, + state->hook, hdrlen)) return NF_DROP; else @@ -321,17 +321,17 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (ret != NF_ACCEPT) return ret; - if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) + if (nf_nat_initialized(ct, HOOK2MANIP(state->hook))) break; - ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + ret = nf_nat_alloc_null_binding(ct, state->hook); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } break; @@ -340,11 +340,11 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + return nf_nat_packet(ct, ctinfo, state->hook, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); From 6aa187f21ca2d8ade791f01fd8fab908b1f27673 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:57 -0500 Subject: [PATCH 19/30] netfilter: nf_tables: kill nft_pktinfo.ops - Add nft_pktinfo.pf to replace ops->pf - Add nft_pktinfo.hook to replace ops->hooknum This simplifies the code, makes it more readable, and likely reduces cache line misses. Maintainability is enhanced as the details of nft_hook_ops are of no concern to the recpients of nft_pktinfo. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 ++++----- include/net/netfilter/nf_tables_ipv4.h | 3 +-- include/net/netfilter/nf_tables_ipv6.h | 3 +-- net/bridge/netfilter/nf_tables_bridge.c | 16 +++++++--------- net/bridge/netfilter/nft_reject_bridge.c | 12 ++++++------ net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/ipv4/netfilter/nf_tables_ipv4.c | 2 +- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 2 +- net/ipv4/netfilter/nft_chain_route_ipv4.c | 2 +- net/ipv4/netfilter/nft_dup_ipv4.c | 2 +- net/ipv4/netfilter/nft_masq_ipv4.c | 2 +- net/ipv4/netfilter/nft_redir_ipv4.c | 2 +- net/ipv4/netfilter/nft_reject_ipv4.c | 5 ++--- net/ipv6/netfilter/nf_tables_ipv6.c | 2 +- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 2 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 2 +- net/ipv6/netfilter/nft_dup_ipv6.c | 2 +- net/ipv6/netfilter/nft_redir_ipv6.c | 3 +-- net/ipv6/netfilter/nft_reject_ipv6.c | 5 ++--- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nf_tables_netdev.c | 16 +++++++--------- net/netfilter/nft_log.c | 2 +- net/netfilter/nft_meta.c | 4 ++-- net/netfilter/nft_queue.c | 2 +- net/netfilter/nft_reject_inet.c | 14 +++++++------- 25 files changed, 54 insertions(+), 64 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index aa8bee72c9d3..c0899f97ff8d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -16,7 +16,8 @@ struct nft_pktinfo { struct sk_buff *skb; const struct net_device *in; const struct net_device *out; - const struct nf_hook_ops *ops; + u8 pf; + u8 hook; u8 nhoff; u8 thoff; u8 tprot; @@ -25,16 +26,14 @@ struct nft_pktinfo { }; static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { pkt->skb = skb; pkt->in = pkt->xt.in = state->in; pkt->out = pkt->xt.out = state->out; - pkt->ops = ops; - pkt->xt.hooknum = ops->hooknum; - pkt->xt.family = ops->pf; + pkt->hook = pkt->xt.hooknum = state->hook; + pkt->pf = pkt->xt.family = state->pf; } /** diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 2df7f96902ee..ca6ef6bf775e 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -6,13 +6,12 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { struct iphdr *ip; - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); ip = ip_hdr(pkt->skb); pkt->tprot = ip->protocol; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 97db2e3a5e65..8ad39a6a5fe1 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -6,14 +6,13 @@ static inline int nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { int protohdr, thoff = 0; unsigned short frag_off; - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); /* If malformed, drop it */ diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index a343e62442b1..318d825e4207 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -65,27 +65,25 @@ int nft_bridge_ip6hdr_validate(struct sk_buff *skb) EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, ops, skb, state); + nft_set_pktinfo_ipv4(pkt, skb, state); else - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); } static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0) + nft_set_pktinfo_ipv6(pkt, skb, state) == 0) return; #endif - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); } static unsigned int @@ -97,13 +95,13 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_bridge_set_pktinfo_ipv4(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state); + nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, ops, skb, state); + nft_set_pktinfo(&pkt, skb, state); break; } diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 858d848564ee..cee92612b2cc 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -273,16 +273,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->ops->hooknum, + pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, - pkt->ops->hooknum); + pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->ops->hooknum, + pkt->hook, nft_reject_icmp_code(priv->icmp_code)); break; } @@ -291,16 +291,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, - pkt->ops->hooknum, + pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, - pkt->ops->hooknum); + pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, - pkt->ops->hooknum, + pkt->hook, nft_reject_icmpv6_code(priv->icmp_code)); break; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 8412268bbad1..883bbf83fe09 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,7 @@ nft_do_chain_arp(const struct nf_hook_ops *ops, { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, ops, skb, state); + nft_set_pktinfo(&pkt, skb, state); return nft_do_chain(&pkt, ops); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index aa180d3a69a5..805be5c9fcc3 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -24,7 +24,7 @@ static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb, state); return nft_do_chain(&pkt, ops); } diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index bf5c30ae14e4..c3ffecf28d38 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -33,7 +33,7 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb, state); return nft_do_chain(&pkt, ops); } diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index e335b0afdaf3..2a1e3d8a3e43 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -37,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb, state); mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index b45932d43b69..30bcf820e8bd 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, }; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif); + nf_dup_ipv4(pkt->skb, pkt->hook, &gw, oif); } static int nft_dup_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 40e414c4ca56..b72ffc58e255 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -26,7 +26,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, &range, pkt->out); } diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index d8d795df9c13..c09d4381427e 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -36,7 +36,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, mr.range[0].flags |= priv->flags; regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, - pkt->ops->hooknum); + pkt->hook); } static struct nft_expr_type nft_redir_ipv4_type; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index b07e58b51158..c1582e03b628 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -27,11 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nf_send_reset(pkt->skb, pkt->hook); break; default: break; diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index c8148ba76d1a..41340b794f9b 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -23,7 +23,7 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, struct nft_pktinfo pkt; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) + if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) return NF_DROP; return nft_do_chain(&pkt, ops); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 951bb458b7bd..e96feaefeb14 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -31,7 +31,7 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, ops, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb, state); return nft_do_chain(&pkt, ops); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 0dafdaac5e17..d1bcd2ed7bcc 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, u32 mark, flowlabel; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) + if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) return NF_DROP; /* save source/dest address, mark, hoplimit, flowlabel, priority */ diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 0eaa4f65fdea..c81204faf15d 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif); + nf_dup_ipv6(pkt->skb, pkt->hook, gw, oif); } static int nft_dup_ipv6_init(const struct nft_ctx *ctx, diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index effd393bd517..aca44e89a881 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -35,8 +35,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, range.flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, - pkt->ops->hooknum); + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index d0d1540ecf87..ffcac7d5da43 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -28,11 +28,10 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nf_send_unreach6(net, pkt->skb, priv->icmp_code, pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nf_send_reset6(net, pkt->skb, pkt->hook); break; default: break; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 05d0b03530f6..539083099c0d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -50,7 +50,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + nf_log_trace(net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 2cae4d4a03b7..db416a3396e9 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -17,13 +17,13 @@ static inline void nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, + struct sk_buff *skb, const struct nf_hook_state *state) { struct iphdr *iph, _iph; u32 len, thoff; - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), &_iph); @@ -48,7 +48,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, static inline void __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -82,12 +81,11 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, } static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - nft_set_pktinfo(pkt, ops, skb, state); - __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); + __nft_netdev_set_pktinfo_ipv6(pkt, skb, state); } static unsigned int @@ -98,13 +96,13 @@ nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state); + nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, ops, skb, state); + nft_set_pktinfo(&pkt, skb, state); break; } diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index a13d6a386d63..c7c7df85f0b7 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -33,7 +33,7 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_log *priv = nft_expr_priv(expr); struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index cb2f13ebb5a6..e4ad2c24bc41 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -42,7 +42,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - *dest = pkt->ops->pf; + *dest = pkt->pf; break; case NFT_META_L4PROTO: *dest = pkt->tprot; @@ -135,7 +135,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; } - switch (pkt->ops->pf) { + switch (pkt->pf) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) *dest = PACKET_MULTICAST; diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 96805d21d618..61d216eb7917 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -42,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, pkt->ops->pf, + priv->queues_total, pkt->pf, jhash_initval); } } diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 635dbba93d01..dea6750af6ff 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -24,20 +24,20 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); - switch (pkt->ops->pf) { + switch (pkt->pf) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nf_send_reset(pkt->skb, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), - pkt->ops->hooknum); + pkt->hook); break; } break; @@ -45,15 +45,15 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nf_send_reset6(net, pkt->skb, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach6(net, pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), - pkt->ops->hooknum); + pkt->hook); break; } break; From 156c196f6038610770588a708b9e0f7df2ead74a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:58 -0500 Subject: [PATCH 20/30] netfilter: x_tables: Pass struct net in xt_action_param As xt_action_param lives on the stack this does not bloat any persistent data structures. This is a first step in making netfilter code that needs to know which network namespace it is executing in simpler. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 ++- include/net/netfilter/nf_tables.h | 1 + net/bridge/netfilter/ebtables.c | 1 + net/ipv4/netfilter/arp_tables.c | 1 + net/ipv4/netfilter/ip_tables.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + net/sched/act_ipt.c | 1 + net/sched/em_ipset.c | 1 + 8 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b006b719183f..c5577410c25d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -13,6 +13,7 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data + * @net network namespace through which the action was invoked * @in: input netdevice * @out: output netdevice * @fragoff: packet is a fragment, this is the data offset @@ -24,7 +25,6 @@ * Fields written to by extensions: * * @hotdrop: drop packet if we had inspection problems - * Network namespace obtainable using dev_net(in/out) */ struct xt_action_param { union { @@ -34,6 +34,7 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; + struct net *net; const struct net_device *in, *out; int fragoff; unsigned int thoff; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c0899f97ff8d..c0516529e8a0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -30,6 +30,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { pkt->skb = skb; + pkt->xt.net = state->net; pkt->in = pkt->xt.in = state->in; pkt->out = pkt->xt.out = state->out; pkt->hook = pkt->xt.hooknum = state->hook; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d5a3975b963..f46ca417bf2d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -200,6 +200,7 @@ unsigned int ebt_do_table(struct sk_buff *skb, struct xt_action_param acpar; acpar.family = NFPROTO_BRIDGE; + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.hotdrop = false; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 10eb2b297450..2dad3e1c5f11 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -285,6 +285,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, */ e = get_entry(table_base, private->hook_entry[hook]); + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.hooknum = hook; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2b049e135de8..42d0946956db 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -315,6 +315,7 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.family = NFPROTO_IPV4; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index da6446b6e3f9..80e3bd72b715 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.family = NFPROTO_IPV6; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 99c9cc1c7af9..d05869646515 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, * worry later - danger - this API seems to have changed * from earlier kernels */ + par.net = dev_net(skb->dev); par.in = skb->dev; par.out = NULL; par.hooknum = ipt->tcfi_hook; diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index df0328ba6a48..c66ca9400ab4 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -95,6 +95,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); + acpar.net = em->net; acpar.in = indev ? indev : dev; acpar.out = dev; From 686c9b50809dc80cba7c2e9f809471ab40bae735 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:59 -0500 Subject: [PATCH 21/30] netfilter: x_tables: Use par->net instead of computing from the passed net devices Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 2 +- net/bridge/netfilter/ebt_nflog.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/ipt_rpfilter.c | 5 ++--- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- net/ipv6/netfilter/ip6t_rpfilter.c | 6 +++--- net/netfilter/ipset/ip_set_core.c | 9 +++------ net/netfilter/xt_LOG.c | 2 +- net/netfilter/xt_NFLOG.c | 2 +- net/netfilter/xt_TCPMSS.c | 2 +- net/netfilter/xt_TPROXY.c | 24 ++++++++++++------------ net/netfilter/xt_addrtype.c | 4 ++-- net/netfilter/xt_connlimit.c | 2 +- net/netfilter/xt_osf.c | 2 +- net/netfilter/xt_recent.c | 2 +- net/netfilter/xt_socket.c | 14 ++++++++------ 17 files changed, 41 insertions(+), 43 deletions(-) diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 17f2e4bc2a29..0ad639a96142 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -180,7 +180,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 59ac7952010d..54816150608e 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,7 +24,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index f471a0628c75..dfab314981e9 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -258,7 +258,7 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_net *snet = synproxy_pernet(par->net); struct synproxy_options opts = {}; struct tcphdr *th, _th; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 8618fd150c96..74dd6671b66d 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -32,12 +32,11 @@ static __be32 rpfilter_get_saddr(__be32 addr) return addr; } -static bool rpfilter_lookup_reverse(struct flowi4 *fl4, +static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, const struct net_device *dev, u8 flags) { struct fib_result res; bool dev_match; - struct net *net = dev_net(dev); int ret __maybe_unused; if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) @@ -98,7 +97,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; - return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0ed841a3fa33..db29bbf41b59 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,7 +39,7 @@ static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; - struct net *net = dev_net((par->in != NULL) ? par->in : par->out); + struct net *net = par->net; switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 4c9f3e79d75f..41451809b37c 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -275,7 +275,7 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_net *snet = synproxy_pernet(par->net); struct synproxy_options opts = {}; struct tcphdr *th, _th; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 790e0c6b19e1..1ee1b25df096 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -26,7 +26,7 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr) return addr_type & IPV6_ADDR_UNICAST; } -static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, +static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, const struct net_device *dev, u8 flags) { struct rt6_info *rt; @@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, lookup_flags |= RT6_LOOKUP_F_IFACE; } - rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags); + rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); if (rt->dst.error) goto out; @@ -93,7 +93,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ - return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 338b4047776f..69ab9c2634e1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -519,8 +519,7 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get( - dev_net(par->in ? par->in : par->out), index); + struct ip_set *set = ip_set_rcu_get(par->net, index); int ret = 0; BUG_ON(!set); @@ -558,8 +557,7 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get( - dev_net(par->in ? par->in : par->out), index); + struct ip_set *set = ip_set_rcu_get(par->net, index); int ret; BUG_ON(!set); @@ -581,8 +579,7 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get( - dev_net(par->in ? par->in : par->out), index); + struct ip_set *set = ip_set_rcu_get(par->net, index); int ret = 0; BUG_ON(!set); diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index c13b79440ede..1763ab82bcd7 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -33,7 +33,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index fb7497c928a0..a1fa2c800cb9 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -26,7 +26,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8c02501a530f..b7c43def0dc6 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); if (dst_mtu(skb_dst(skb)) <= minlen) { diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index d0c96c5ae29a..3ab591e73ec0 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -250,8 +250,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, * no such listener is found, or NULL if the TCP header is incomplete. */ static struct sock * -tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, - struct sock *sk) +tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr _hdr, *hp; @@ -267,7 +267,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + sk2 = nf_tproxy_get_sock_v4(net, iph->protocol, iph->saddr, laddr ? laddr : iph->daddr, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); @@ -290,7 +290,7 @@ nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) } static unsigned int -tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, +tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, u_int32_t mark_mask, u_int32_t mark_value) { const struct iphdr *iph = ip_hdr(skb); @@ -305,7 +305,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + sk = nf_tproxy_get_sock_v4(net, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NFT_LOOKUP_ESTABLISHED); @@ -317,11 +317,11 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) /* reopening a TIME_WAIT connection needs special handling */ - sk = tproxy_handle_time_wait4(skb, laddr, lport, sk); + sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk); else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + sk = nf_tproxy_get_sock_v4(net, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NFT_LOOKUP_LISTENER); @@ -351,7 +351,7 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; - return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } static unsigned int @@ -359,7 +359,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 @@ -429,7 +429,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk2 = nf_tproxy_get_sock_v6(par->net, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -472,7 +472,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk = nf_tproxy_get_sock_v6(par->net, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, par->in, NFT_LOOKUP_ESTABLISHED); @@ -487,7 +487,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk = nf_tproxy_get_sock_v6(par->net, tproto, &iph->saddr, laddr, hp->source, lport, par->in, NFT_LOOKUP_LISTENER); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 5b4743cc0436..11d6091991a4 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -143,7 +143,7 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 075d89d94d28..213db252e5be 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -317,7 +317,7 @@ static int count_them(struct net *net, static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 0778855ea5e7..df8801e02a32 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -200,7 +200,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; - struct net *net = dev_net(p->in ? p->in : p->out); + struct net *net = p->net; if (!info) return false; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 45e1b30e4fb2..d725a27743a1 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -237,7 +237,7 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 43e26c881100..2ec08f04b816 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,7 +143,8 @@ static bool xt_socket_sk_is_transparent(struct sock *sk) } } -static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, +static struct sock *xt_socket_lookup_slow_v4(struct net *net, + const struct sk_buff *skb, const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); @@ -197,7 +198,7 @@ static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, } #endif - return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, + return xt_socket_get_sock_v4(net, protocol, saddr, daddr, sport, dport, indev); } @@ -209,7 +210,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(skb, par->in); + sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -335,7 +336,8 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, return NULL; } -static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, +static struct sock *xt_socket_lookup_slow_v6(struct net *net, + const struct sk_buff *skb, const struct net_device *indev) { __be16 uninitialized_var(dport), uninitialized_var(sport); @@ -371,7 +373,7 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, return NULL; } - return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, + return xt_socket_get_sock_v6(net, tproto, saddr, daddr, sport, dport, indev); } @@ -383,7 +385,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(skb, par->in); + sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; From 46448d0093ba18a212e314fd9ea029e740baa476 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:00 -0500 Subject: [PATCH 22/30] netfilter: nf_tables: Pass struct net in nft_pktinfo nft_pktinfo is passed on the stack so this does not bloat any in core data structures. By centrally computing this information this makes maintence of the code simpler, and understading of the code easier. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c0516529e8a0..42e239e55aa3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -14,6 +14,7 @@ struct nft_pktinfo { struct sk_buff *skb; + struct net *net; const struct net_device *in; const struct net_device *out; u8 pf; @@ -30,7 +31,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->xt.net = state->net; + pkt->net = pkt->xt.net = state->net; pkt->in = pkt->xt.in = state->in; pkt->out = pkt->xt.out = state->out; pkt->hook = pkt->xt.hooknum = state->hook; From 88182a0e0c665cbd2f01af743c7f88e494924246 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:01 -0500 Subject: [PATCH 23/30] netfilter: nf_tables: Use pkt->net instead of computing net from the passed net_devices Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 13 ++++++------- net/ipv6/netfilter/nft_reject_ipv6.c | 6 +++--- net/netfilter/nf_tables_core.c | 6 ++---- net/netfilter/nft_log.c | 3 +-- net/netfilter/nft_reject_inet.c | 7 +++---- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index cee92612b2cc..fdba3d9fbff3 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -261,7 +261,6 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; if (is_broadcast_ether_addr(dest) || @@ -290,17 +289,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, - pkt->hook); + nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb, + pkt->in, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, nft_reject_icmpv6_code(priv->icmp_code)); break; } diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index ffcac7d5da43..533cd5719c59 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -24,14 +24,14 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, pkt->hook); + nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, + pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->hook); + nf_send_reset6(pkt->net, pkt->skb, pkt->hook); break; default: break; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 539083099c0d..e5c1f332e45e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -48,9 +48,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt, const struct nft_chain *chain, int rulenum, enum nft_trace type) { - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - - nf_log_trace(net, pkt->pf, pkt->hook, pkt->skb, pkt->in, + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); @@ -114,7 +112,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; - const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + const struct net *net = pkt->net; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index c7c7df85f0b7..319c22b4bca2 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -31,9 +31,8 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_log *priv = nft_expr_priv(expr); - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->pf, pkt->hook, pkt->skb, pkt->in, + nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index dea6750af6ff..0bc19f97e238 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -22,7 +22,6 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); switch (pkt->pf) { case NFPROTO_IPV4: @@ -44,14 +43,14 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, + nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->hook); + nf_send_reset6(pkt->net, pkt->skb, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(net, pkt->skb, + nf_send_unreach6(pkt->net, pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), pkt->hook); break; From 206e8c00752fbe9cc463184236ac64b2a532cda5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:02 -0500 Subject: [PATCH 24/30] netfilter: Pass net to nf_dup_ipv4 and nf_dup_ipv6 This allows them to stop guessing the network namespace with pick_net. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_dup_ipv4.h | 2 +- include/net/netfilter/ipv6/nf_dup_ipv6.h | 2 +- net/ipv4/netfilter/nf_dup_ipv4.c | 23 ++++------------------- net/ipv4/netfilter/nft_dup_ipv4.c | 2 +- net/ipv6/netfilter/nf_dup_ipv6.c | 23 ++++------------------- net/ipv6/netfilter/nft_dup_ipv6.c | 2 +- net/netfilter/xt_TEE.c | 4 ++-- 7 files changed, 14 insertions(+), 44 deletions(-) diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h index 42008f10dfc4..0a14733e8b82 100644 --- a/include/net/netfilter/ipv4/nf_dup_ipv4.h +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -1,7 +1,7 @@ #ifndef _NF_DUP_IPV4_H_ #define _NF_DUP_IPV4_H_ -void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, +void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in_addr *gw, int oif); #endif /* _NF_DUP_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h index ed6bd66fa5a0..fa6237b382a3 100644 --- a/include/net/netfilter/ipv6/nf_dup_ipv6.h +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -1,7 +1,7 @@ #ifndef _NF_DUP_IPV6_H_ #define _NF_DUP_IPV6_H_ -void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, +void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif); #endif /* _NF_DUP_IPV6_H_ */ diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index 2d79e6e8d934..ce2a59e5c665 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -23,25 +23,10 @@ #include #endif -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw, - int oif) +static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb, + const struct in_addr *gw, int oif) { const struct iphdr *iph = ip_hdr(skb); - struct net *net = pick_net(skb); struct rtable *rt; struct flowi4 fl4; @@ -65,7 +50,7 @@ static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw, return true; } -void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, +void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in_addr *gw, int oif) { struct iphdr *iph; @@ -105,7 +90,7 @@ void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, --iph->ttl; ip_send_check(iph); - if (nf_dup_ipv4_route(skb, gw, oif)) { + if (nf_dup_ipv4_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); ip_local_out(skb); __this_cpu_write(nf_skb_duplicated, false); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index 30bcf820e8bd..bf855e64fc45 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, }; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv4(pkt->skb, pkt->hook, &gw, oif); + nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); } static int nft_dup_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index c8ab626556a0..ee0d9a5b16c3 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -19,25 +19,10 @@ #include #endif -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, - int oif) +static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, + const struct in6_addr *gw, int oif) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = pick_net(skb); struct dst_entry *dst; struct flowi6 fl6; @@ -61,7 +46,7 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, return true; } -void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, +void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif) { if (this_cpu_read(nf_skb_duplicated)) @@ -81,7 +66,7 @@ void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, struct ipv6hdr *iph = ipv6_hdr(skb); --iph->hop_limit; } - if (nf_dup_ipv6_route(skb, gw, oif)) { + if (nf_dup_ipv6_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); ip6_local_out(skb); __this_cpu_write(nf_skb_duplicated, false); diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index c81204faf15d..8bfd470cbe72 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv6(pkt->skb, pkt->hook, gw, oif); + nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); } static int nft_dup_ipv6_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index fd980aa7715d..899b06115fc5 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -32,7 +32,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); + nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, info->priv->oif); return XT_CONTINUE; } @@ -43,7 +43,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); + nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, info->priv->oif); return XT_CONTINUE; } From a4ffe319ae72034e3f3332698a2fd83b6f063b18 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:03 -0500 Subject: [PATCH 25/30] act_connmark: Remember the struct net instead of guessing it. Stop guessing the struct net instead of remember it. Guessing is just silly and will be problematic in the future when I implement routes between network namespaces. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/tc_act/tc_connmark.h | 1 + net/sched/act_connmark.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index 5c1104c2e24f..02caa406611b 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -5,6 +5,7 @@ struct tcf_connmark_info { struct tcf_common common; + struct net *net; u16 zone; }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 5019a47b9270..413ac39147d8 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -74,7 +74,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, zone.id = ca->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; - thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple); + thash = nf_conntrack_find_get(ca->net, &zone, &tuple); if (!thash) goto out; @@ -119,6 +119,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci = to_connmark(a); ci->tcf_action = parm->action; + ci->net = net; ci->zone = parm->zone; tcf_hash_insert(a); From a31f1adc0948930fba9ab5a111ccd735a5d864c6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:04 -0500 Subject: [PATCH 26/30] netfilter: nf_conntrack: Add a struct net parameter to l4_pkt_to_tuple As gre does not have the srckey in the packet gre_pkt_to_tuple needs to perform a lookup in it's per network namespace tables. Pass in the proper network namespace to all pkt_to_tuple implementations to ensure gre (and any similar protocols) can get this right. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 ++- include/net/netfilter/nf_conntrack_core.h | 1 + include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 3 ++- net/netfilter/nf_conntrack_core.c | 10 ++++++---- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 3 +-- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 1 + net/netfilter/nf_conntrack_proto_udplite.c | 1 + net/netfilter/xt_connlimit.c | 2 +- net/openvswitch/conntrack.c | 2 +- net/sched/act_connmark.c | 2 +- 16 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e8ad46834df8..d642f68a7c73 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -191,7 +191,8 @@ int nf_conntrack_hash_check_insert(struct nf_conn *ct); bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, - u_int16_t l3num, struct nf_conntrack_tuple *tuple); + u_int16_t l3num, struct net *net, + struct nf_conntrack_tuple *tuple); bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index c03f9c42b3cd..788ef58a66b9 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -41,6 +41,7 @@ void nf_conntrack_cleanup_end(void); bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, + struct net *net, struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 1f7061313d54..956d8a6ac069 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -26,7 +26,7 @@ struct nf_conntrack_l4proto { /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple); + struct net *net, struct nf_conntrack_tuple *tuple); /* Invert the per-proto part of the tuple: ie. turn xmit into reply. * Some packets can't be inverted: return 0 in that case. diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index cdde3ec496e9..c567e1b5d799 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -30,7 +30,7 @@ static inline struct nf_icmp_net *icmp_pernet(struct net *net) } static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmphdr *hp; struct icmphdr _hdr; @@ -144,7 +144,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb) + ip_hdrlen(skb) + sizeof(struct icmphdr), - PF_INET, &origtuple)) { + PF_INET, net, &origtuple)) { pr_debug("icmp_error_message: failed to get tuple\n"); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 0e6fae103d33..d3b797446cea 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -36,6 +36,7 @@ static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmp6hdr *hp; @@ -159,7 +160,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, skb_network_offset(skb) + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr), - PF_INET6, &origtuple)) { + PF_INET6, net, &origtuple)) { pr_debug("icmpv6_error: Can't get tuple\n"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c09d6c7198f6..09d1d19b2ab9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -168,6 +168,7 @@ nf_ct_get_tuple(const struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, + struct net *net, struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) @@ -181,12 +182,13 @@ nf_ct_get_tuple(const struct sk_buff *skb, tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; - return l4proto->pkt_to_tuple(skb, dataoff, tuple); + return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuple); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, - u_int16_t l3num, struct nf_conntrack_tuple *tuple) + u_int16_t l3num, + struct net *net, struct nf_conntrack_tuple *tuple) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -205,7 +207,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, l4proto = __nf_ct_l4proto_find(l3num, protonum); - ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, + ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, l3proto, l4proto); rcu_read_unlock(); @@ -1029,7 +1031,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, u32 hash; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), - dataoff, l3num, protonum, &tuple, l3proto, + dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { pr_debug("resolve_normal_ct: Can't get tuple\n"); return NULL; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 6dd995c7c72b..fce1b1cca32d 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -398,7 +398,7 @@ static inline struct dccp_net *dccp_pernet(struct net *net) } static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { struct dccp_hdr _hdr, *dh; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 2281be419a74..86dc752e5349 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -45,7 +45,7 @@ static inline struct nf_generic_net *generic_pernet(struct net *net) static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { tuple->src.u.all = 0; tuple->dst.u.all = 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 7648674f29c3..a96451a7af20 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -190,9 +190,8 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, /* gre hdr info to tuple */ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { - struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev); const struct gre_hdr_pptp *pgrehdr; struct gre_hdr_pptp _pgrehdr; __be16 srckey; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 67197731eb68..9578a7c371ef 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -156,7 +156,7 @@ static inline struct sctp_net *sctp_pernet(struct net *net) } static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { const struct sctphdr *hp; struct sctphdr _hdr; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 70383de72054..278f3b9356ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -277,7 +277,7 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net) } static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { const struct tcphdr *hp; struct tcphdr _hdr; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 6957281ffee5..478f92f834b6 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -38,6 +38,7 @@ static inline struct nf_udp_net *udp_pernet(struct net *net) static bool udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct udphdr *hp; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index c5903d1649f9..1ac8ee13a873 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -48,6 +48,7 @@ static inline struct udplite_net *udplite_pernet(struct net *net) static bool udplite_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct udphdr *hp; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 213db252e5be..99bbc829868d 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -332,7 +332,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) { + par->family, net, &tuple)) { goto hotdrop; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e8e524ad8a01..aaf5cbd6d9ae 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -345,7 +345,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, { struct nf_conntrack_tuple tuple; - if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) return NULL; return __nf_ct_expect_find(net, zone, &tuple); } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 413ac39147d8..bb41699c6c49 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -68,7 +68,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - proto, &tuple)) + proto, ca->net, &tuple)) goto out; zone.id = ca->zone; From 176971b33859135d8dbda9b79e16cb1cf615eb92 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:05 -0500 Subject: [PATCH 27/30] ipvs: Read hooknum from state rather than ops->hooknum This should be more cache efficient as state is more likely to be in core, and the netfilter core will stop passing in ops soon. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 453972c6909e..40e3c85f83b5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1314,7 +1314,7 @@ static unsigned int ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET); + return ip_vs_out(state->hook, skb, AF_INET); } /* @@ -1325,7 +1325,7 @@ static unsigned int ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET); + return ip_vs_out(state->hook, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1339,7 +1339,7 @@ static unsigned int ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET6); + return ip_vs_out(state->hook, skb, AF_INET6); } /* @@ -1350,7 +1350,7 @@ static unsigned int ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET6); + return ip_vs_out(state->hook, skb, AF_INET6); } #endif @@ -1850,7 +1850,7 @@ static unsigned int ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET); + return ip_vs_in(state->hook, skb, AF_INET); } /* @@ -1861,7 +1861,7 @@ static unsigned int ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET); + return ip_vs_in(state->hook, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1874,7 +1874,7 @@ static unsigned int ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET6); + return ip_vs_in(state->hook, skb, AF_INET6); } /* @@ -1885,7 +1885,7 @@ static unsigned int ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET6); + return ip_vs_in(state->hook, skb, AF_INET6); } #endif From 06198b34a3e09e06d9aecaa3727e0d37206cea77 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:06 -0500 Subject: [PATCH 28/30] netfilter: Pass priv instead of nf_hook_ops to netfilter hooks Only pass the void *priv parameter out of the nf_hook_ops. That is all any of the functions are interested now, and by limiting what is passed it becomes simpler to change implementation details. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- include/net/netfilter/br_netfilter.h | 2 +- include/net/netfilter/nf_nat_l3proto.h | 32 +++++++++---------- include/net/netfilter/nf_tables.h | 3 +- net/bridge/br_netfilter_hooks.c | 14 ++++---- net/bridge/br_netfilter_ipv6.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 4 +-- net/bridge/netfilter/ebtable_nat.c | 4 +-- net/bridge/netfilter/nf_tables_bridge.c | 4 +-- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/iptable_nat.c | 18 +++++------ net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/iptable_security.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv4.c | 8 ++--- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 24 +++++++------- net/ipv4/netfilter/nf_tables_arp.c | 4 +-- net/ipv4/netfilter/nf_tables_ipv4.c | 8 ++--- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 20 ++++++------ net/ipv4/netfilter/nft_chain_route_ipv4.c | 4 +-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_nat.c | 18 +++++------ net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv6.c | 8 ++--- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 24 +++++++------- net/ipv6/netfilter/nf_tables_ipv6.c | 8 ++--- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 20 ++++++------ net/ipv6/netfilter/nft_chain_route_ipv6.c | 4 +-- net/netfilter/core.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 24 +++++++------- net/netfilter/nf_tables_core.c | 4 +-- net/netfilter/nf_tables_netdev.c | 4 +-- security/selinux/hooks.c | 10 +++--- security/smack/smack_netfilter.c | 4 +-- 43 files changed, 156 insertions(+), 157 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0b4d4560f33d..987c74cd523c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -80,7 +80,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->okfn = okfn; } -typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, +typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 8fe266504900..c93c75fa41ad 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -46,7 +46,7 @@ void br_netfilter_enable(void); #if IS_ENABLED(CONFIG_IPV6) int br_validate_ipv6(struct sk_buff *skb); -unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, +unsigned int br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); #else diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index a3127325f624..aef3e5fc9fd9 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -43,31 +43,31 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum); -unsigned int nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); -unsigned int nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); -unsigned int nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, +unsigned int nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); -unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); @@ -76,31 +76,31 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, unsigned int hdrlen); -unsigned int nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); -unsigned int nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); -unsigned int nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, +unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); -unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 42e239e55aa3..c9149cc0a02d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -816,8 +816,7 @@ int nft_register_basechain(struct nft_base_chain *basechain, void nft_unregister_basechain(struct nft_base_chain *basechain, unsigned int hook_nops); -unsigned int nft_do_chain(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops); +unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); /** * struct nft_table - nf_tables table diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index e6e76bbdc82f..e21e44c13e07 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -464,7 +464,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb) * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, +static unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -486,7 +486,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(ops, skb, state); + return br_nf_pre_routing_ipv6(priv, skb, state); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -526,7 +526,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, +static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -570,7 +570,7 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, +static unsigned int br_nf_forward_ip(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -633,7 +633,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, +static unsigned int br_nf_forward_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -801,7 +801,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff } /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, +static unsigned int br_nf_post_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -850,7 +850,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, +static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index e4dbbe44c724..c51cc3fd50d9 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -218,7 +218,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables. */ -unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, +unsigned int br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 118ce40ac181..f9242dffa65e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -57,14 +57,14 @@ static const struct ebt_table frame_filter = { }; static unsigned int -ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ebt_do_table(skb, state, state->net->xt.frame_filter); } static unsigned int -ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ebt_do_table(skb, state, state->net->xt.frame_filter); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 56c3329d6c37..4bbefe03ab58 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -57,14 +57,14 @@ static struct ebt_table frame_nat = { }; static unsigned int -ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_nat_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ebt_do_table(skb, state, state->net->xt.frame_nat); } static unsigned int -ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_nat_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ebt_do_table(skb, state, state->net->xt.frame_nat); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 318d825e4207..62f6b1b19589 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -87,7 +87,7 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, } static unsigned int -nft_do_chain_bridge(const struct nf_hook_ops *ops, +nft_do_chain_bridge(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -105,7 +105,7 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops, break; } - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_bridge __read_mostly = { diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index af34fc9bdf69..85f2fdc360c2 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) } -static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, +static unsigned int dnrmg_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 1352e12d4068..1897ee160920 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -27,7 +27,7 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int -arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +arptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return arpt_do_table(skb, state, state->net->ipv4.arptable_filter); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 69157d8eba95..3f32c03e8b2e 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -507,7 +507,7 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(const struct nf_hook_ops *ops, +arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index dfab314981e9..d7021f28c3f0 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -299,7 +299,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, +static unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 02d4c5395d6e..397ef2dd133e 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -33,7 +33,7 @@ static const struct xt_table packet_filter = { }; static unsigned int -iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +iptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT && diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dc2ff6884999..2d6fc911866f 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -78,7 +78,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) /* The work comes in here from netfilter.c. */ static unsigned int -iptable_mangle_hook(const struct nf_hook_ops *ops, +iptable_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 8ff63ac1f0d6..3a2e4d830a0b 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -28,7 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = { .af = NFPROTO_IPV4, }; -static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) @@ -36,32 +36,32 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, return ipt_do_table(skb, state, state->net->ipv4.nat_table); } -static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain); } -static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain); } -static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain); } -static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index bbb0523d87de..1ba02811acb0 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -20,7 +20,7 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +iptable_raw_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT && diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index b92417038705..f534e2f05bad 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -37,7 +37,7 @@ static const struct xt_table security_table = { }; static unsigned int -iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +iptable_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT && diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 15749cc5cf2b..752fb40adcf8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_helper(const struct nf_hook_ops *ops, +static unsigned int ipv4_helper(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -119,7 +119,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops, ct, ctinfo); } -static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, +static unsigned int ipv4_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -143,14 +143,14 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, +static unsigned int ipv4_conntrack_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } -static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, +static unsigned int ipv4_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 8aea536d2e83..b246346ee849 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -61,7 +61,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, return IP_DEFRAG_CONNTRACK_OUT + zone_id; } -static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, +static unsigned int ipv4_conntrack_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 16da45a76dac..8593a9d88619 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,9 +255,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); unsigned int -nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -308,7 +308,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, state, ct); + ret = do_chain(priv, skb, state, ct); if (ret != NF_ACCEPT) return ret; @@ -345,9 +345,9 @@ oif_changed: EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); unsigned int -nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -355,7 +355,7 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -365,9 +365,9 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); unsigned int -nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -384,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -407,9 +407,9 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); unsigned int -nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -424,7 +424,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 883bbf83fe09..9d09d4f59545 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -15,7 +15,7 @@ #include static unsigned int -nft_do_chain_arp(const struct nf_hook_ops *ops, +nft_do_chain_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -23,7 +23,7 @@ nft_do_chain_arp(const struct nf_hook_ops *ops, nft_set_pktinfo(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_arp __read_mostly = { diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 805be5c9fcc3..ca9dc3c46c4f 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -18,7 +18,7 @@ #include #include -static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, +static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -26,10 +26,10 @@ static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv4(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, +static unsigned int nft_ipv4_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -41,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv4(ops, skb, state); + return nft_do_chain_ipv4(priv, skb, state); } struct nft_af_info nft_af_ipv4 __read_mostly = { diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index c3ffecf28d38..f5c66a7a4bf2 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -26,7 +26,7 @@ #include #include -static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) @@ -35,35 +35,35 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv4(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_fn(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_in(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_out(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv4 = { diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 2a1e3d8a3e43..9f486b302108 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -21,7 +21,7 @@ #include #include -static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, +static unsigned int nf_route_table_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, daddr = iph->daddr; tos = iph->tos; - ret = nft_do_chain(&pkt, ops); + ret = nft_do_chain(&pkt, priv); if (ret != NF_DROP && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 41451809b37c..c2356602158a 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -316,7 +316,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, +static unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index a7327f61b90c..8b277b983ca5 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,7 +32,7 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c2e061dcedf3..8745b592b2f6 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -75,7 +75,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index efa6754c4d06..abea175d5853 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -30,7 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; -static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) @@ -38,32 +38,32 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat); } -static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain); } -static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain); } -static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain); } -static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index fac6ad7c0a7c..9021963565c3 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,7 +19,7 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_raw_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 96c94fc240c8..0d856fedfeb0 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,7 +36,7 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 339be1d59afc..dd83ad42f8f6 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(const struct nf_hook_ops *ops, +static unsigned int ipv6_helper(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, +static unsigned int ipv6_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -165,14 +165,14 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, +static unsigned int ipv6_conntrack_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } -static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, +static unsigned int ipv6_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index a9c08520596b..a99baf63eccf 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -51,7 +51,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, return IP6_DEFRAG_CONNTRACK_OUT + zone_id; } -static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, +static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 8bc94907dbd9..357f57ba47e4 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); unsigned int -nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -317,7 +317,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, state, ct); + ret = do_chain(priv, skb, state, ct); if (ret != NF_ACCEPT) return ret; @@ -353,9 +353,9 @@ oif_changed: EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn); unsigned int -nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv6_in); unsigned int -nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv6_out); unsigned int -nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -430,7 +430,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 41340b794f9b..120ea9131be0 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -16,7 +16,7 @@ #include #include -static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, +static unsigned int nft_do_chain_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -26,10 +26,10 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) return NF_DROP; - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, +static unsigned int nft_ipv6_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv6(ops, skb, state); + return nft_do_chain_ipv6(priv, skb, state); } struct nft_af_info nft_af_ipv6 __read_mostly = { diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index e96feaefeb14..443cd306c0b0 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -24,7 +24,7 @@ #include #include -static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) @@ -33,35 +33,35 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv6(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv6 = { diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index d1bcd2ed7bcc..d42bbc1d7555 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -22,7 +22,7 @@ #include #include -static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, +static unsigned int nf_route_table_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain(&pkt, ops); + ret = nft_do_chain(&pkt, priv); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 8e47f8113495..2e907335ee81 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -269,7 +269,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(*elemp, skb, state); + verdict = (*elemp)->hook((*elemp)->priv, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 40e3c85f83b5..1fa12edccbcc 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1311,7 +1311,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_out(state->hook, skb, AF_INET); @@ -1322,7 +1322,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_out(state->hook, skb, AF_INET); @@ -1336,7 +1336,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_out(state->hook, skb, AF_INET6); @@ -1347,7 +1347,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_out(state->hook, skb, AF_INET6); @@ -1847,7 +1847,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_remote_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_in(state->hook, skb, AF_INET); @@ -1858,7 +1858,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_in(state->hook, skb, AF_INET); @@ -1871,7 +1871,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_remote_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_in(state->hook, skb, AF_INET6); @@ -1882,7 +1882,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ip_vs_in(state->hook, skb, AF_INET6); @@ -1901,7 +1901,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int r; @@ -1917,12 +1917,12 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, ops->hooknum); + return ip_vs_in_icmp(skb, &r, state->hook); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int r; @@ -1940,7 +1940,7 @@ ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); + return ip_vs_in_icmp_v6(skb, &r, state->hook, &iphdr); } #endif diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e5c1f332e45e..f3695a497408 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -109,9 +109,9 @@ struct nft_jumpstack { }; unsigned int -nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) +nft_do_chain(struct nft_pktinfo *pkt, void *priv) { - const struct nft_chain *chain = ops->priv, *basechain = chain; + const struct nft_chain *chain = priv, *basechain = chain; const struct net *net = pkt->net; const struct nft_rule *rule; const struct nft_expr *expr, *last; diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index db416a3396e9..7b9c053ba750 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -89,7 +89,7 @@ static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, } static unsigned int -nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb, +nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; @@ -106,7 +106,7 @@ nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb, break; } - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_netdev __read_mostly = { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e4369d86e588..64340160f4ac 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4866,7 +4866,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, +static unsigned int selinux_ipv4_forward(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -4874,7 +4874,7 @@ static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, +static unsigned int selinux_ipv6_forward(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -4924,7 +4924,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, +static unsigned int selinux_ipv4_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -5099,7 +5099,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, +static unsigned int selinux_ipv4_postroute(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -5107,7 +5107,7 @@ static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, +static unsigned int selinux_ipv6_postroute(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c index a455cfc9ec1f..a9e41da05d28 100644 --- a/security/smack/smack_netfilter.c +++ b/security/smack/smack_netfilter.c @@ -21,7 +21,7 @@ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops, +static unsigned int smack_ipv6_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -38,7 +38,7 @@ static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops, } #endif /* IPV6 */ -static unsigned int smack_ipv4_output(const struct nf_hook_ops *ops, +static unsigned int smack_ipv4_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { From c7af6483b9f7f3eaba01b2e62d3d8a70cd89bdaf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:07 -0500 Subject: [PATCH 29/30] netfilter: Pass net into nf_xfrm_me_harder Instead of calling dev_net on a likley looking network device pass state->net into nf_xfrm_me_harder. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat_core.h | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 ++-- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++-- net/netfilter/nf_nat_core.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index fbfd1ba4254e..186c54138f35 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -10,7 +10,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb); -int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family); +int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family); static inline int nf_nat_initialized(struct nf_conn *ct, enum nf_nat_manip_type manip) diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 8593a9d88619..bc3b9dcbf080 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -396,7 +396,7 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb, (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(skb, AF_INET); + err = nf_xfrm_me_harder(state->net, skb, AF_INET); if (err < 0) ret = NF_DROP_ERR(err); } @@ -440,7 +440,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(skb, AF_INET); + err = nf_xfrm_me_harder(state->net, skb, AF_INET); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 357f57ba47e4..18e835ffbef3 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -403,7 +403,7 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(skb, AF_INET6); + err = nf_xfrm_me_harder(state->net, skb, AF_INET6); if (err < 0) ret = NF_DROP_ERR(err); } @@ -446,7 +446,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(skb, AF_INET6); + err = nf_xfrm_me_harder(state->net, skb, AF_INET6); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5113dfd39df9..06a9f45771ab 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -83,7 +83,7 @@ out: rcu_read_unlock(); } -int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) +int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) { struct flowi fl; unsigned int hh_len; @@ -99,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); From 0a031ac5c00d091ce1f7007f22d5881620bf0a7e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:08 -0500 Subject: [PATCH 30/30] netfilter: Use nf_ct_net instead of dev_net(out) in nf_nat_masquerade_ipv6 Use nf_ct_net(ct) instead of guessing that the netdevice out can reliably report the network namespace the conntrack operation is happening in. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 7745609665cd..31ba7ca19757 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -34,7 +34,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY)); - if (ipv6_dev_get_saddr(dev_net(out), out, + if (ipv6_dev_get_saddr(nf_ct_net(ct), out, &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP;