From 76c82d7a3d24a4ae1f9b098287c18055546c1a47 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:52 -0500 Subject: [PATCH 01/28] net_sched: Fail if missing mandatory action operation methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fd7072827a40..618695e84190 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -270,6 +270,10 @@ int tcf_register_action(struct tc_action_ops *act) { struct tc_action_ops *a, **ap; + /* Must supply act, dump, cleanup and init */ + if (!act->act || !act->dump || !act->cleanup || !act->init) + return -EINVAL; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -381,7 +385,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, } while ((a = act) != NULL) { repeat: - if (a->ops && a->ops->act) { + if (a->ops) { ret = a->ops->act(skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ @@ -405,7 +409,7 @@ void tcf_action_destroy(struct tc_action *act, int bind) struct tc_action *a; for (a = act; a; a = act) { - if (a->ops && a->ops->cleanup) { + if (a->ops) { if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -424,7 +428,7 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; return a->ops->dump(skb, a, bind, ref); } @@ -436,7 +440,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; if (nla_put_string(skb, TCA_KIND, a->ops->kind)) From 63ef6174654a986f263d25e957ef9d1ff243f649 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:53 -0500 Subject: [PATCH 02/28] net_sched: Default action lookup method for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 618695e84190..d1a022e441be 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,6 +274,9 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + if (!act->lookup) + act->lookup = tcf_hash_search; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -727,8 +730,6 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (a->ops == NULL) goto err_free; - if (a->ops->lookup == NULL) - goto err_mod; err = -ENOENT; if (a->ops->lookup(a, index) == 0) goto err_mod; From 43c00dcf8888daea234226e8adf09c37b00d2245 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:54 -0500 Subject: [PATCH 03/28] net_sched: Use default action lookup functions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_police.c | 1 - 7 files changed, 8 deletions(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3a4c0caa1f7d..4225a9382a2b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -585,7 +585,6 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, - .lookup = tcf_hash_search, .init = tcf_csum_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index fd2b3cff5fa2..15851da99f3b 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -206,7 +206,6 @@ static struct tc_action_ops act_gact_ops = { .act = tcf_gact, .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, - .lookup = tcf_hash_search, .init = tcf_gact_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60d88b6b9560..1d3e19180c2e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -298,7 +298,6 @@ static struct tc_action_ops act_ipt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 977c10e0631b..6cb16ec3d624 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -271,7 +271,6 @@ static struct tc_action_ops act_mirred_ops = { .act = tcf_mirred, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, - .lookup = tcf_hash_search, .init = tcf_mirred_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 876f0ef29694..30c13dedc94d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -308,7 +308,6 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat, .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, - .lookup = tcf_hash_search, .init = tcf_nat_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7ed78c9e505c..ab4fc56f8852 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -243,7 +243,6 @@ static struct tc_action_ops act_pedit_ops = { .act = tcf_pedit, .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, - .lookup = tcf_hash_search, .init = tcf_pedit_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 272d8e924cf6..16a62c36928a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -407,7 +407,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; From 382ca8a1ad8963c7676585f9e25f4c5ff8b28439 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:55 -0500 Subject: [PATCH 04/28] net_sched: Provide default walker function for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d1a022e441be..69cb848e8345 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,8 +274,11 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + /* Supply defaults */ if (!act->lookup) act->lookup = tcf_hash_search; + if (!act->walk) + act->walk = tcf_generic_walker; write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { @@ -1089,12 +1092,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) memset(&a, 0, sizeof(struct tc_action)); a.ops = a_o; - if (a_o->walk == NULL) { - WARN(1, "tc_dump_action: %s !capable of dumping table\n", - a_o->kind); - goto out_module_put; - } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) From 651a6493ae5c055c78777bb7178c23b5565631da Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:56 -0500 Subject: [PATCH 05/28] net_sched: Use default action walker methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_simple.c | 1 - net/sched/act_skbedit.c | 1 - 8 files changed, 9 deletions(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4225a9382a2b..5c5edf56adbd 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -586,7 +586,6 @@ static struct tc_action_ops act_csum_ops = { .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, .init = tcf_csum_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 15851da99f3b..5645a4d32abd 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -207,7 +207,6 @@ static struct tc_action_ops act_gact_ops = { .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, .init = tcf_gact_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 1d3e19180c2e..882a89762f77 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -299,7 +299,6 @@ static struct tc_action_ops act_ipt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; static struct tc_action_ops act_xt_ops = { @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6cb16ec3d624..252378121ce7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -272,7 +272,6 @@ static struct tc_action_ops act_mirred_ops = { .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, .init = tcf_mirred_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 30c13dedc94d..6a15ace00241 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -309,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, .init = tcf_nat_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ab4fc56f8852..03b67674169c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -244,7 +244,6 @@ static struct tc_action_ops act_pedit_ops = { .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7725eb4ab756..31157d3e729c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,6 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_cleanup, .init = tcf_simp_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cb4221171f93..35ea643b4325 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -203,7 +203,6 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .cleanup = tcf_skbedit_cleanup, .init = tcf_skbedit_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Alexander Duyck, "); From df48ada4f82da039e3894f988b5aca08507621ee Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 5 Dec 2013 16:25:20 -0800 Subject: [PATCH 06/28] MAINTAINERS: Update Intel Wired Ethernet LAN Maintainers Remove Tushar and Peter (PJ) as maintainers, since they have moved out of our group and no longer work on Wired Ethernet. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4afcfb4c892b..9641efe66856 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4461,10 +4461,8 @@ M: Bruce Allan M: Carolyn Wyborny M: Don Skidmore M: Greg Rose -M: Peter P Waskiewicz Jr M: Alex Duyck M: John Ronciak -M: Tushar Dave L: e1000-devel@lists.sourceforge.net W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ From 1431fb31ecbaba1b5718006128f0f2ed0b94e1c3 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 3 Dec 2013 17:39:29 +0000 Subject: [PATCH 07/28] xen-netback: fix fragment detection in checksum setup The code to detect fragments in checksum_setup() was missing for IPv4 and too eager for IPv6. (It transpires that Windows seems to send IPv6 packets with a fragment header even if they are not a fragment - i.e. offset is zero, and M bit is not set). This patch also incorporates a fix to callers of maybe_pull_tail() where skb->network_header was being erroneously added to the length argument. Signed-off-by: Paul Durrant Signed-off-by: Zoltan Kiss Cc: Wei Liu Cc: Ian Campbell Cc: David Vrabel cc: David Miller Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 228 +++++++++++++++++------------- include/linux/ipv6.h | 1 + include/net/ipv6.h | 3 +- 3 files changed, 136 insertions(+), 96 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64f0e0d18b81..acf13920e6d1 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1149,49 +1149,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return 0; } -static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) +static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len, + unsigned int max) { - if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { - /* If we need to pullup then pullup to the max, so we - * won't need to do it again. - */ - int target = min_t(int, skb->len, MAX_TCP_HEADER); - __pskb_pull_tail(skb, target - skb_headlen(skb)); - } + if (skb_headlen(skb) >= len) + return 0; + + /* If we need to pullup then pullup to the max, so we + * won't need to do it again. + */ + if (max > skb->len) + max = skb->len; + + if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) + return -ENOMEM; + + if (skb_headlen(skb) < len) + return -EPROTO; + + return 0; } +/* This value should be large enough to cover a tagged ethernet header plus + * maximally sized IP and TCP or UDP headers. + */ +#define MAX_IP_HDR_LEN 128 + static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, int recalculate_partial_csum) { - struct iphdr *iph = (void *)skb->data; - unsigned int header_size; unsigned int off; - int err = -EPROTO; + bool fragment; + int err; - off = sizeof(struct iphdr); + fragment = false; - header_size = skb->network_header + off + MAX_IPOPTLEN; - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, + sizeof(struct iphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; - off = iph->ihl * 4; + if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + fragment = true; - switch (iph->protocol) { + off = ip_hdrlen(skb); + + err = -EPROTO; + + switch (ip_hdr(skb)->protocol) { case IPPROTO_TCP: if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = tcp_hdr(skb); + err = maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; - header_size = skb->network_header + - off + - sizeof(struct tcphdr); - maybe_pull_tail(skb, header_size); - - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - off, - IPPROTO_TCP, 0); + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); } break; case IPPROTO_UDP: @@ -1200,24 +1223,20 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, goto out; if (recalculate_partial_csum) { - struct udphdr *udph = udp_hdr(skb); + err = maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; - header_size = skb->network_header + - off + - sizeof(struct udphdr); - maybe_pull_tail(skb, header_size); - - udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - off, - IPPROTO_UDP, 0); + udp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); } break; default: - if (net_ratelimit()) - netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, " - "dropping a protocol %d packet\n", - iph->protocol); goto out; } @@ -1227,75 +1246,99 @@ out: return err; } +/* This value should be large enough to cover a tagged ethernet header plus + * an IPv6 header, all options, and a maximal TCP or UDP header. + */ +#define MAX_IPV6_HDR_LEN 256 + +#define OPT_HDR(type, skb, off) \ + (type *)(skb_network_header(skb) + (off)) + static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, int recalculate_partial_csum) { - int err = -EPROTO; - struct ipv6hdr *ipv6h = (void *)skb->data; + int err; u8 nexthdr; - unsigned int header_size; unsigned int off; + unsigned int len; bool fragment; bool done; + fragment = false; done = false; off = sizeof(struct ipv6hdr); - header_size = skb->network_header + off; - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; - nexthdr = ipv6h->nexthdr; + nexthdr = ipv6_hdr(skb)->nexthdr; - while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && - !done) { + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + while (off <= len && !done) { switch (nexthdr) { case IPPROTO_DSTOPTS: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: { - struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + struct ipv6_opt_hdr *hp; - header_size = skb->network_header + - off + - sizeof(struct ipv6_opt_hdr); - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, + off + + sizeof(struct ipv6_opt_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_optlen(hp); break; } case IPPROTO_AH: { - struct ip_auth_hdr *hp = (void *)(skb->data + off); + struct ip_auth_hdr *hp; - header_size = skb->network_header + - off + - sizeof(struct ip_auth_hdr); - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, + off + + sizeof(struct ip_auth_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + hp = OPT_HDR(struct ip_auth_hdr, skb, off); nexthdr = hp->nexthdr; - off += (hp->hdrlen+2)<<2; + off += ipv6_authlen(hp); + break; + } + case IPPROTO_FRAGMENT: { + struct frag_hdr *hp; + + err = maybe_pull_tail(skb, + off + + sizeof(struct frag_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct frag_hdr, skb, off); + + if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) + fragment = true; + + nexthdr = hp->nexthdr; + off += sizeof(struct frag_hdr); break; } - case IPPROTO_FRAGMENT: - fragment = true; - /* fall through */ default: done = true; break; } } - if (!done) { - if (net_ratelimit()) - netdev_err(vif->dev, "Failed to parse packet header\n"); - goto out; - } + err = -EPROTO; - if (fragment) { - if (net_ratelimit()) - netdev_err(vif->dev, "Packet is a fragment!\n"); + if (!done || fragment) goto out; - } switch (nexthdr) { case IPPROTO_TCP: @@ -1304,17 +1347,17 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = tcp_hdr(skb); + err = maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; - header_size = skb->network_header + - off + - sizeof(struct tcphdr); - maybe_pull_tail(skb, header_size); - - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, - &ipv6h->daddr, - skb->len - off, - IPPROTO_TCP, 0); + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); } break; case IPPROTO_UDP: @@ -1323,25 +1366,20 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (recalculate_partial_csum) { - struct udphdr *udph = udp_hdr(skb); + err = maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; - header_size = skb->network_header + - off + - sizeof(struct udphdr); - maybe_pull_tail(skb, header_size); - - udph->check = ~csum_ipv6_magic(&ipv6h->saddr, - &ipv6h->daddr, - skb->len - off, - IPPROTO_UDP, 0); + udp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); } break; default: - if (net_ratelimit()) - netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, " - "dropping a protocol %d packet\n", - nexthdr); goto out; } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5d89d1b808a6..c56c350324e4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -4,6 +4,7 @@ #include #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) +#define ipv6_authlen(p) (((p)->hdrlen+2) << 2) /* * This structure contains configuration options per IPv6 link. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eb198acaac1d..488316e339a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -110,7 +110,8 @@ struct frag_hdr { __be32 identification; }; -#define IP6_MF 0x0001 +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 #include From fb6e0883f2e7ee2c2330a12bac3604c79a97f35e Mon Sep 17 00:00:00 2001 From: Jitendra Kalsaria Date: Thu, 5 Dec 2013 18:11:22 -0500 Subject: [PATCH 08/28] qlge: Fix ethtool statistics o Receive mac error stat was getting overwritten by other stats. Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c index 0780e039b271..8dee1beb9854 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c @@ -181,6 +181,7 @@ static const char ql_gstrings_test[][ETH_GSTRING_LEN] = { }; #define QLGE_TEST_LEN (sizeof(ql_gstrings_test) / ETH_GSTRING_LEN) #define QLGE_STATS_LEN ARRAY_SIZE(ql_gstrings_stats) +#define QLGE_RCV_MAC_ERR_STATS 7 static int ql_update_ring_coalescing(struct ql_adapter *qdev) { @@ -280,6 +281,9 @@ static void ql_update_stats(struct ql_adapter *qdev) iter++; } + /* Update receive mac error statistics */ + iter += QLGE_RCV_MAC_ERR_STATS; + /* * Get Per-priority TX pause frame counter statistics. */ From 4be1028e9f99011e34dfe405d572592610776c74 Mon Sep 17 00:00:00 2001 From: Jitendra Kalsaria Date: Thu, 5 Dec 2013 18:11:23 -0500 Subject: [PATCH 09/28] qlge: Allow enable/disable rx/tx vlan acceleration independently o Fix the driver to allow user to enable/disable rx/tx vlan acceleration independently. For example: ethtool -K ethX rxvlan on/off ethtool -K ethX txvlan on/off Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index a245dc18d769..449f506d2e8f 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2376,14 +2376,6 @@ static netdev_features_t qlge_fix_features(struct net_device *ndev, netdev_features_t features) { int err; - /* - * Since there is no support for separate rx/tx vlan accel - * enable/disable make sure tx flag is always in same state as rx. - */ - if (features & NETIF_F_HW_VLAN_CTAG_RX) - features |= NETIF_F_HW_VLAN_CTAG_TX; - else - features &= ~NETIF_F_HW_VLAN_CTAG_TX; /* Update the behavior of vlan accel in the adapter */ err = qlge_update_hw_vlan_features(ndev, features); From ace34c921a2d63e05ef4da959588c355b5ea1dfa Mon Sep 17 00:00:00 2001 From: Jitendra Kalsaria Date: Thu, 5 Dec 2013 18:11:24 -0500 Subject: [PATCH 10/28] qlge: Update version to 1.00.00.34 Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 0c9c4e895595..03517478e589 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -18,7 +18,7 @@ */ #define DRV_NAME "qlge" #define DRV_STRING "QLogic 10 Gigabit PCI-E Ethernet Driver " -#define DRV_VERSION "1.00.00.33" +#define DRV_VERSION "1.00.00.34" #define WQ_ADDR_ALIGN 0x3 /* 4 byte alignment */ From dd0df47dc3548bf2dfdc7b4d65f49b452a9d9701 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 3 Dec 2013 15:13:02 -0800 Subject: [PATCH 11/28] net: davinci_emac: Fix platform data handling and make usable for am3517 When booted with device tree, we may still have platform data passed as auxdata. For am3517 this is needed for passing the interrupt_enable and interrupt_disable callbacks that access the omap system control module registers. These callback functions will eventually go away when we have a separate system control module driver. Some of the things that are currently passed as platform data we don't need to set up as device tree properties as they are always the same on am3517. So let's use a new compatible flag for those so we can get those from the device tree match data. Also note that we need to fix setting of phy_dev to NULL instead of an empty string as the code later on uses that to find the first phy on the mdio bus. This seems to have been caused by 5d69e0076a72 (net: davinci_emac: switch to new mdio). Signed-off-by: Tony Lindgren Signed-off-by: David S. Miller --- .../devicetree/bindings/net/davinci_emac.txt | 2 +- drivers/net/ethernet/ti/davinci_emac.c | 26 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt index 48b259e29e87..bad381faf036 100644 --- a/Documentation/devicetree/bindings/net/davinci_emac.txt +++ b/Documentation/devicetree/bindings/net/davinci_emac.txt @@ -4,7 +4,7 @@ This file provides information, what the device node for the davinci_emac interface contains. Required properties: -- compatible: "ti,davinci-dm6467-emac"; +- compatible: "ti,davinci-dm6467-emac" or "ti,am3517-emac" - reg: Offset and length of the register set for the device - ti,davinci-ctrl-reg-offset: offset to control register - ti,davinci-ctrl-mod-reg-offset: offset to control module register diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 41ba974bf37c..cd9b164a0434 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -1752,10 +1753,14 @@ static const struct net_device_ops emac_netdev_ops = { #endif }; +static const struct of_device_id davinci_emac_of_match[]; + static struct emac_platform_data * davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) { struct device_node *np; + const struct of_device_id *match; + const struct emac_platform_data *auxdata; struct emac_platform_data *pdata = NULL; const u8 *mac_addr; @@ -1793,7 +1798,20 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) priv->phy_node = of_parse_phandle(np, "phy-handle", 0); if (!priv->phy_node) - pdata->phy_id = ""; + pdata->phy_id = NULL; + + auxdata = pdev->dev.platform_data; + if (auxdata) { + pdata->interrupt_enable = auxdata->interrupt_enable; + pdata->interrupt_disable = auxdata->interrupt_disable; + } + + match = of_match_device(davinci_emac_of_match, &pdev->dev); + if (match && match->data) { + auxdata = match->data; + pdata->version = auxdata->version; + pdata->hw_ram_addr = auxdata->hw_ram_addr; + } pdev->dev.platform_data = pdata; @@ -2020,8 +2038,14 @@ static const struct dev_pm_ops davinci_emac_pm_ops = { }; #if IS_ENABLED(CONFIG_OF) +static const struct emac_platform_data am3517_emac_data = { + .version = EMAC_VERSION_2, + .hw_ram_addr = 0x01e20000, +}; + static const struct of_device_id davinci_emac_of_match[] = { {.compatible = "ti,davinci-dm6467-emac", }, + {.compatible = "ti,am3517-emac", .data = &am3517_emac_data, }, {}, }; MODULE_DEVICE_TABLE(of, davinci_emac_of_match); From 78ac814f120da17053b3d52aa215c7c547c5e77d Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 4 Dec 2013 17:32:39 +0800 Subject: [PATCH 12/28] sctp: disable max_burst when the max_burst is 0 As Michael pointed out that when max_burst is 0, it just disable max_burst. It declared in rfc6458#section-8.1.24. so add the check in sctp_transport_burst_limited, when it 0, just do nothing. Reviewed-by: Daniel Borkmann Suggested-by: Vlad Yasevich Suggested-by: Michael Tuexen Signed-off-by: Wang Weidong Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e332efb124cc..efc46ffed1fd 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -573,7 +573,7 @@ void sctp_transport_burst_limited(struct sctp_transport *t) u32 old_cwnd = t->cwnd; u32 max_burst_bytes; - if (t->burst_limited) + if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); From c8781cf4a309ae4d1393f5878d4e51987665898c Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 4 Dec 2013 12:04:54 +0200 Subject: [PATCH 13/28] bnx2x: avoid null pointer dereference when enabling SR-IOV Fixed NULL pointer dereference when dynamically activating SR-IOV after vf database failed to be allocated in probe stage (for example due to no ARI support in pci hub). Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 0216d592d0ce..2e46c28fc601 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -3114,6 +3114,11 @@ int bnx2x_sriov_configure(struct pci_dev *dev, int num_vfs_param) { struct bnx2x *bp = netdev_priv(pci_get_drvdata(dev)); + if (!IS_SRIOV(bp)) { + BNX2X_ERR("failed to configure SR-IOV since vfdb was not allocated. Check dmesg for errors in probe stage\n"); + return -EINVAL; + } + DP(BNX2X_MSG_IOV, "bnx2x_sriov_configure called with %d, BNX2X_NR_VIRTFN(bp) was %d\n", num_vfs_param, BNX2X_NR_VIRTFN(bp)); From 89015c18ff34a39e4679ddd9b058e74d7dde28e7 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 4 Dec 2013 18:59:31 +0800 Subject: [PATCH 14/28] bonding: add arp_ip_target checks when install the module When I install the bonding with the wrong arp_ip_target, just like arp_ip_target=500.500.500.500, the arp_ip_target was transfored to 245.245.245.244 and stored in the ip target success, it is uncorrect, so I add checks to avoid adding wrong address. The in4_pton() will set wrong ip address to 0.0.0.0 and return 0, also use the micro IS_IP_TARGET_UNUSABLE_ADDRESS to simplify the code. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 36eab0c4fb33..398e299ee1bd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4199,9 +4199,9 @@ static int bond_check_params(struct bond_params *params) (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { /* not complete check, but should be good enough to catch mistakes */ - __be32 ip = in_aton(arp_ip_target[i]); - if (!isdigit(arp_ip_target[i][0]) || ip == 0 || - ip == htonl(INADDR_BROADCAST)) { + __be32 ip; + if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || + IS_IP_TARGET_UNUSABLE_ADDRESS(ip)) { pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", arp_ip_target[i]); arp_interval = 0; From 86d9be263a7ad6f17214d7f112b4c8739137485d Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 4 Dec 2013 18:06:51 +0100 Subject: [PATCH 15/28] forcedeth: run loopback test only on chipsets that support it The driver incorrectly run loopback test on chips that don't support it. Loopback test is only supported by chips that has DEV_HAS_TEST_EXTENDED flag and returns 4 (NV_TEST_COUNT_EXTENDED) as test count. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 2d045be4b5cf..1e8b9514718b 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5150,8 +5150,10 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64 { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); - int result; - memset(buffer, 0, nv_get_sset_count(dev, ETH_SS_TEST)*sizeof(u64)); + int result, count; + + count = nv_get_sset_count(dev, ETH_SS_TEST); + memset(buffer, 0, count * sizeof(u64)); if (!nv_link_test(dev)) { test->flags |= ETH_TEST_FL_FAILED; @@ -5195,7 +5197,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64 return; } - if (!nv_loopback_test(dev)) { + if (count > NV_TEST_COUNT_BASE && !nv_loopback_test(dev)) { test->flags |= ETH_TEST_FL_FAILED; buffer[3] = 1; } From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Dec 2013 20:12:04 -0800 Subject: [PATCH 16/28] tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling. kill memcg_tcp_enter_memory_pressure. The only function of memcg_tcp_enter_memory_pressure was to reduce deal with the unnecessary abstraction that was tcp_memcontrol. Now that struct tcp_memcontrol is gone remove this unnecessary function, the unnecessary function pointer, and modify sk_enter_memory_pressure to set this field directly, just as sk_leave_memory_pressure cleas this field directly. This fixes a small bug I intruduced when killing struct tcp_memcontrol that caused memcg_tcp_enter_memory_pressure to never be called and thus failed to ever set cg_proto->memory_pressure. Remove the cg_proto enter_memory_pressure function as it now serves no useful purpose. Don't test cg_proto->memory_presser in sk_leave_memory_pressure before clearing it. The test was originally there to ensure that the pointer was non-NULL. Now that cg_proto is not a pointer the pointer does not matter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++---- net/ipv4/tcp_memcontrol.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index e3a18ff0c38b..2ef3c3eca47a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1035,7 +1035,6 @@ enum cg_proto_flags { }; struct cg_proto { - void (*enter_memory_pressure)(struct sock *sk); struct res_counter memory_allocated; /* Current allocated memory. */ struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; @@ -1155,8 +1154,7 @@ static inline void sk_leave_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - if (cg_proto->memory_pressure) - cg_proto->memory_pressure = 0; + cg_proto->memory_pressure = 0; } } @@ -1171,7 +1169,7 @@ static inline void sk_enter_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - cg_proto->enter_memory_pressure(sk); + cg_proto->memory_pressure = 1; } sk->sk_prot->enter_memory_pressure(sk); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 269a89ecd2f4..f7e522c558ba 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include #include -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* From 239c78db9c41a8f524cce60507440d72229d73bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 5 Dec 2013 23:29:19 +0100 Subject: [PATCH 17/28] net: clear local_df when passing skb between namespaces We must clear local_df when passing the skb between namespaces as the packet is not local to the new namespace any more and thus may not get fragmented by local rules. Fred Templin noticed that other namespaces do fragment IPv6 packets while forwarding. Instead they should have send back a PTB. The same problem should be present when forwarding DF-IPv4 packets between namespaces. Reported-by: Templin, Fred L Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2718fed53d8c..06e72d3cdf60 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3584,6 +3584,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; + skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); From e6ebc7f16ca1434a334647aa56399c546be4e64b Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Fri, 6 Dec 2013 14:16:50 +0800 Subject: [PATCH 18/28] macvtap: update file current position Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 9093004f9b63..957cc5c3653d 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -876,6 +876,8 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ + if (ret > 0) + iocb->ki_pos = ret; out: return ret; } From d0b7da8afa079ffe018ab3e92879b7138977fc8f Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Fri, 6 Dec 2013 14:16:51 +0800 Subject: [PATCH 19/28] tun: update file current position Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 782e38bfc1ee..e26cbea1ce68 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1356,6 +1356,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = tun_do_read(tun, tfile, iocb, iv, len, file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); + if (ret > 0) + iocb->ki_pos = ret; out: tun_put(tun); return ret; From 4bebb56a6d50ca50a34048194877dca34f572f8f Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 5 Dec 2013 12:07:55 +0530 Subject: [PATCH 20/28] be2net: Fix Lancer error recovery to distinguish FW download The Firmware update would be detected by looking at the sliport_error1/ sliport_error2 register values(0x02/0x00). If its not a FW reset the current messaging would take place. If the error is due to FW reset, log a message to user that "Firmware update in progress" and also do not log sliport_status and sliport_error register values. Signed-off-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_hw.h | 3 +++ drivers/net/ethernet/emulex/benet/be_main.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 3e2162121601..dc88782185f2 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -64,6 +64,9 @@ #define SLIPORT_ERROR_NO_RESOURCE1 0x2 #define SLIPORT_ERROR_NO_RESOURCE2 0x9 +#define SLIPORT_ERROR_FW_RESET1 0x2 +#define SLIPORT_ERROR_FW_RESET2 0x0 + /********* Memory BAR register ************/ #define PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET 0xfc /* Host Interrupt Enable, if set interrupts are enabled although "PCI Interrupt diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index fee64bf10446..a2302dc061f3 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2464,8 +2464,16 @@ void be_detect_error(struct be_adapter *adapter) */ if (sliport_status & SLIPORT_STATUS_ERR_MASK) { adapter->hw_error = true; - dev_err(&adapter->pdev->dev, - "Error detected in the card\n"); + /* Do not log error messages if its a FW reset */ + if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && + sliport_err2 == SLIPORT_ERROR_FW_RESET2) { + dev_info(&adapter->pdev->dev, + "Firmware update in progress\n"); + return; + } else { + dev_err(&adapter->pdev->dev, + "Error detected in the card\n"); + } } if (sliport_status & SLIPORT_STATUS_ERR_MASK) { @@ -3812,6 +3820,8 @@ static int lancer_fw_download(struct be_adapter *adapter, } if (change_status == LANCER_FW_RESET_NEEDED) { + dev_info(&adapter->pdev->dev, + "Resetting adapter to activate new FW\n"); status = lancer_physdev_ctrl(adapter, PHYSDEV_CONTROL_FW_RESET_MASK); if (status) { @@ -4363,13 +4373,13 @@ static int lancer_recover_func(struct be_adapter *adapter) goto err; } - dev_err(dev, "Error recovery successful\n"); + dev_err(dev, "Adapter recovery successful\n"); return 0; err: if (status == -EAGAIN) dev_err(dev, "Waiting for resource provisioning\n"); else - dev_err(dev, "Error recovery failed\n"); + dev_err(dev, "Adapter recovery failed\n"); return status; } From b05004adf9d34e7fb8e96fe6a7cdeb0843f5ab35 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 5 Dec 2013 12:08:16 +0530 Subject: [PATCH 21/28] be2net: Free/delete pmacs (in be_clear()) only if they exist During suspend-resume and lancer error recovery we will cleanup and re-initialize the resources through be_clear() and be_setup() respectively. During re-initialisation in be_setup(), if be_get_config() fails, we'll again call be_clear() which will cause a NULL pointer dereference as adapter->pmac_id is already freed. Signed-off-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 23 ++++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a2302dc061f3..0fde69d5cb6a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2940,28 +2940,35 @@ static void be_cancel_worker(struct be_adapter *adapter) } } -static int be_clear(struct be_adapter *adapter) +static void be_mac_clear(struct be_adapter *adapter) { int i; + if (adapter->pmac_id) { + for (i = 0; i < (adapter->uc_macs + 1); i++) + be_cmd_pmac_del(adapter, adapter->if_handle, + adapter->pmac_id[i], 0); + adapter->uc_macs = 0; + + kfree(adapter->pmac_id); + adapter->pmac_id = NULL; + } +} + +static int be_clear(struct be_adapter *adapter) +{ be_cancel_worker(adapter); if (sriov_enabled(adapter)) be_vf_clear(adapter); /* delete the primary mac along with the uc-mac list */ - for (i = 0; i < (adapter->uc_macs + 1); i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i], 0); - adapter->uc_macs = 0; + be_mac_clear(adapter); be_cmd_if_destroy(adapter, adapter->if_handle, 0); be_clear_queues(adapter); - kfree(adapter->pmac_id); - adapter->pmac_id = NULL; - be_msix_disable(adapter); return 0; } From a752a8b94da4865d9c361c16ccf7ccb2994291dd Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 5 Dec 2013 11:36:58 +0100 Subject: [PATCH 22/28] bonding: fix packets_per_slave showing There's an issue when showing the value of packets_per_slave due to using signed integer. The value may be < 0 and thus not put through reciprocal_value() before showing. This patch makes it use unsigned integer when showing it. CC: Andy Gospodarek CC: Jay Vosburgh CC: Veaceslav Falico CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index abf5e106edc5..0ae580bbc5db 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1635,12 +1635,12 @@ static ssize_t bonding_show_packets_per_slave(struct device *d, char *buf) { struct bonding *bond = to_bond(d); - int packets_per_slave = bond->params.packets_per_slave; + unsigned int packets_per_slave = bond->params.packets_per_slave; if (packets_per_slave > 1) packets_per_slave = reciprocal_value(packets_per_slave); - return sprintf(buf, "%d\n", packets_per_slave); + return sprintf(buf, "%u\n", packets_per_slave); } static ssize_t bonding_store_packets_per_slave(struct device *d, From fa9fac1725fd38dad414e3729c56c089a7188383 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 5 Dec 2013 18:36:20 +0400 Subject: [PATCH 23/28] virtio-net: determine type of bufs correctly free_unused_bufs must check vi->mergeable_rx_bufs before vi->big_packets, because we use this sequence in other places. Otherwise we allocate buffer of one type, then free it as another type. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables pcspkr virtio_balloon virtio_net(-) i2c_pii CPU: 0 PID: 400 Comm: rmmod Not tainted 3.13.0-rc2+ #170 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b6d2a210 ti: ffff8800aed32000 task.ti: ffff8800aed32000 RIP: 0010:[] [] free_unused_bufs+0xc3/0x190 [virtio_net] RSP: 0018:ffff8800aed33dd8 EFLAGS: 00010202 RAX: ffff8800b1fe2c00 RBX: ffff8800b66a7240 RCX: 6b6b6b6b6b6b6b6b RDX: 6b6b6b6b6b6b6b6b RSI: ffff8800b8419a68 RDI: ffff8800b66a1148 RBP: ffff8800aed33e00 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffff8800b66a1148 R14: 0000000000000000 R15: 000077ff80000000 FS: 00007fc4f9c4e740(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f63f432f000 CR3: 00000000b6538000 CR4: 00000000000006f0 Stack: ffff8800b66a7240 ffff8800b66a7380 ffff8800377bd3f0 0000000000000000 00000000023302f0 ffff8800aed33e18 ffffffffa00346e2 ffff8800b66a7240 ffff8800aed33e38 ffffffffa003474d ffff8800377bd388 ffff8800377bd390 Call Trace: [] remove_vq_common+0x22/0x40 [virtio_net] [] virtnet_remove+0x4d/0x90 [virtio_net] [] virtio_dev_remove+0x23/0x80 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xc0/0xd0 [] bus_remove_driver+0x58/0xd0 [] driver_unregister+0x2c/0x50 [] unregister_virtio_driver+0xe/0x10 [] virtio_net_driver_exit+0x10/0x7be [virtio_net] [] SyS_delete_module+0x172/0x220 [] ? trace_hardirqs_on+0xd/0x10 [] ? __audit_syscall_entry+0x9c/0xf0 [] system_call_fastpath+0x16/0x1b Code: c0 74 55 0f 1f 44 00 00 80 7b 30 00 74 7a 48 8b 50 30 4c 89 e6 48 03 73 20 48 85 d2 0f 84 bb 00 00 00 66 0f RIP [] free_unused_bufs+0xc3/0x190 [virtio_net] RSP ---[ end trace edb570ea923cce9c ]--- Fixes: 2613af0ed18a (virtio_net: migrate mergeable rx buffers to page frag allocators) Cc: Michael Dalton Cc: Rusty Russell Cc: "Michael S. Tsirkin" Signed-off-by: Andrey Vagin Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 916241d16c67..930039a8d0ea 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1396,10 +1396,10 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->big_packets) - give_pages(&vi->rq[i], buf); - else if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(&vi->rq[i], buf); else dev_kfree_skb(buf); --vi->rq[i].num; From d4fb84eefe5164f6a6ea51d0a9e26280c661a0dd Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 5 Dec 2013 18:36:21 +0400 Subject: [PATCH 24/28] virtio: delete napi structures from netdev before releasing memory free_netdev calls netif_napi_del too, but it's too late, because napi structures are placed on vi->rq. netif_napi_add() is called from virtnet_alloc_queues. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables virtio_balloon pcspkr virtio_net(-) i2c_pii CPU: 1 PID: 347 Comm: rmmod Not tainted 3.13.0-rc2+ #171 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b779c420 ti: ffff8800379e0000 task.ti: ffff8800379e0000 RIP: 0010:[] [] __list_del_entry+0x29/0xd0 RSP: 0018:ffff8800379e1dd0 EFLAGS: 00010a83 RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800379c2fd0 RCX: dead000000200200 RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000001 RDI: ffff8800379c2fd0 RBP: ffff8800379e1dd0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800379c2f90 R13: ffff880037839160 R14: 0000000000000000 R15: 00000000013352f0 FS: 00007f1400e34740(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f464124c763 CR3: 00000000b68cf000 CR4: 00000000000006e0 Stack: ffff8800379e1df0 ffffffff8155beab 6b6b6b6b6b6b6b2b ffff8800378391c0 ffff8800379e1e18 ffffffff8156499b ffff880037839be0 ffff880037839d20 ffff88003779d3f0 ffff8800379e1e38 ffffffffa003477c ffff88003779d388 Call Trace: [] netif_napi_del+0x1b/0x80 [] free_netdev+0x8b/0x110 [] virtnet_remove+0x7c/0x90 [virtio_net] [] virtio_dev_remove+0x23/0x80 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xc0/0xd0 [] bus_remove_driver+0x58/0xd0 [] driver_unregister+0x2c/0x50 [] unregister_virtio_driver+0xe/0x10 [] virtio_net_driver_exit+0x10/0x6ce [virtio_net] [] SyS_delete_module+0x172/0x220 [] ? trace_hardirqs_on+0xd/0x10 [] ? __audit_syscall_entry+0x9c/0xf0 [] system_call_fastpath+0x16/0x1b Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 RIP [] __list_del_entry+0x29/0xd0 RSP ---[ end trace d5931cd3f87c9763 ]--- Fixes: 986a4f4d452d (virtio_net: multiqueue support) Cc: Rusty Russell Cc: "Michael S. Tsirkin" Signed-off-by: Andrey Vagin Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 930039a8d0ea..c29376443428 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1367,6 +1367,11 @@ static void virtnet_config_changed(struct virtio_device *vdev) static void virtnet_free_queues(struct virtnet_info *vi) { + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) + netif_napi_del(&vi->rq[i].napi); + kfree(vi->rq); kfree(vi->sq); } From 859828c0ea476b42f3a93d69d117aaba90994b6f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 5 Dec 2013 16:27:37 +0100 Subject: [PATCH 25/28] br: fix use of ->rx_handler_data in code executed on non-rx_handler path br_stp_rcv() is reached by non-rx_handler path. That means there is no guarantee that dev is bridge port and therefore simple NULL check of ->rx_handler_data is not enough. There is need to check if dev is really bridge port and since only rcu read lock is held here, do it by checking ->rx_handler pointer. Note that synchronize_net() in netdev_rx_handler_unregister() ensures this approach as valid. Introduced originally by: commit f350a0a87374418635689471606454abc7beaa3a "bridge: use rx_handler_data pointer to store net_bridge_port pointer" Fixed but not in the best way by: commit b5ed54e94d324f17c97852296d61a143f01b227a "bridge: fix RCU races with bridge port" Reintroduced by: commit 716ec052d2280d511e10e90ad54a86f5b5d4dcc2 "bridge: fix NULL pointer deref of br_port_get_rcu" Please apply to stable trees as well. Thanks. RH bugzilla reference: https://bugzilla.redhat.com/show_bug.cgi?id=1025770 Reported-by: Laine Stump Debugged-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Jiri Pirko Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_private.h | 10 ++++++++++ net/bridge/br_stp_bpdu.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 229d820bdf0b..045d56eaeca2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -426,6 +426,16 @@ netdev_features_t br_features_recompute(struct net_bridge *br, int br_handle_frame_finish(struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_handle_frame; +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} + /* br_ioctl.c */ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8660ea3be705..bdb459d21ad8 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; - p = br_port_get_rcu(dev); + p = br_port_get_check_rcu(dev); if (!p) goto err; From a328f3a0599e30d0d30333ec9f30cb1fa44c561a Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 5 Dec 2013 13:35:37 -0300 Subject: [PATCH 26/28] net: mvneta: Fix incorrect DMA unmapping size The current code unmaps the DMA mapping created for rx skb_buff's by using the data_size as the the mapping size. This is wrong since the correct size to specify should match the size used to create the mapping. This commit removes the following DMA_API_DEBUG warning: ------------[ cut here ]------------ WARNING: at lib/dma-debug.c:887 check_unmap+0x3a8/0x860() mvneta d0070000.ethernet: DMA-API: device driver frees DMA memory with different size [device address=0x000000002eb80000] [map size=1600 bytes] [unmap size=66 bytes] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.21-01444-ga88ae13-dirty #92 [] (unwind_backtrace+0x0/0xf8) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (warn_slowpath_common+0x48/0x68) [] (warn_slowpath_common+0x48/0x68) from [] (warn_slowpath_fmt+0x30/0x40) [] (warn_slowpath_fmt+0x30/0x40) from [] (check_unmap+0x3a8/0x860) [] (check_unmap+0x3a8/0x860) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page+0x64/0x70) from [] (mvneta_rx+0xec/0x468) [] (mvneta_rx+0xec/0x468) from [] (mvneta_poll+0x78/0x16c) [] (mvneta_poll+0x78/0x16c) from [] (net_rx_action+0x94/0x160) [] (net_rx_action+0x94/0x160) from [] (__do_softirq+0xe8/0x1d0) [] (__do_softirq+0xe8/0x1d0) from [] (do_softirq+0x4c/0x58) [] (do_softirq+0x4c/0x58) from [] (irq_exit+0x58/0x90) [] (irq_exit+0x58/0x90) from [] (handle_IRQ+0x3c/0x94) [] (handle_IRQ+0x3c/0x94) from [] (armada_370_xp_handle_irq+0x4c/0xb4) [] (armada_370_xp_handle_irq+0x4c/0xb4) from [] (__irq_svc+0x40/0x50) Exception stack(0xc04f1f70 to 0xc04f1fb8) 1f60: c1fe46f8 00000000 00001d92 00001d92 1f80: c04f0000 c04f0000 c04f84a4 c03e081c c05220e7 00000001 c05220e7 c04f0000 1fa0: 00000000 c04f1fb8 c000eaf8 c004c048 60000113 ffffffff [] (__irq_svc+0x40/0x50) from [] (cpu_startup_entry+0x54/0x128) [] (cpu_startup_entry+0x54/0x128) from [] (start_kernel+0x29c/0x2f0) [] (start_kernel+0x29c/0x2f0) from [<00008074>] (0x8074) ---[ end trace d4955f6acd178110 ]--- Mapped at: [] debug_dma_map_page+0x4c/0x11c [] mvneta_setup_rxqs+0x398/0x598 [] mvneta_open+0x40/0x17c [] __dev_open+0x9c/0x100 [] __dev_change_flags+0x7c/0x134 Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index b8e232b4ea2d..d5f0d72e5e33 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1378,7 +1378,7 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp, dev_kfree_skb_any(skb); dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr, - rx_desc->data_size, DMA_FROM_DEVICE); + MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); } if (rx_done) @@ -1424,7 +1424,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, } dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr, - rx_desc->data_size, DMA_FROM_DEVICE); + MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); From 98bfd23cdb30e68e90571d7a2607e9479f8a50ec Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 5 Dec 2013 13:14:05 -0800 Subject: [PATCH 27/28] virtio-net: free bufs correctly on invalid packet length When a packet with invalid length arrives, ensure that the packet is freed correctly if mergeable packet buffers and big packets (GUEST_TSO4) are both enabled. Signed-off-by: Michael Dalton Acked-by: Jason Wang Acked-by: Andrew Vagin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c29376443428..d208f8604981 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -426,10 +426,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; - if (vi->big_packets) - give_pages(rq, buf); - else if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(rq, buf); else dev_kfree_skb(buf); return; From 66e56cd46b93ef407c60adcac62cf33b06119d50 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:15 +0100 Subject: [PATCH 28/28] packet: fix send path when running with proto == 0 Commit e40526cb20b5 introduced a cached dev pointer, that gets hooked into register_prot_hook(), __unregister_prot_hook() to update the device used for the send path. We need to fix this up, as otherwise this will not work with sockets created with protocol = 0, plus with sll_protocol = 0 passed via sockaddr_ll when doing the bind. So instead, assign the pointer directly. The compiler can inline these helper functions automagically. While at it, also assume the cached dev fast-path as likely(), and document this variant of socket creation as it seems it is not widely used (seems not even the author of TX_RING was aware of that in his reference example [1]). Tested with reproducer from e40526cb20b5. [1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released") Signed-off-by: Daniel Borkmann Tested-by: Salam Noureddine Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 10 ++++ net/packet/af_packet.c | 65 +++++++++++++++--------- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index c01223628a87..8e48e3b14227 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -123,6 +123,16 @@ Transmission process is similar to capture as shown below. [shutdown] close() --------> destruction of the transmission socket and deallocation of all associated resources. +Socket creation and destruction is also straight forward, and is done +the same way as in capturing described in the previous paragraph: + + int fd = socket(PF_PACKET, mode, 0); + +The protocol can optionally be 0 in case we only want to transmit +via this socket, which avoids an expensive call to packet_rcv(). +In this case, you also need to bind(2) the TX_RING with sll_protocol = 0 +set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example. + Binding the socket to your network interface is mandatory (with zero copy) to know the header size of frames used in the circular buffer. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba2548bd85bf..88cfbc189558 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -237,6 +237,30 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -246,12 +270,10 @@ static void register_prot_hook(struct sock *sk) struct packet_sock *po = pkt_sk(sk); if (!po->running) { - if (po->fanout) { + if (po->fanout) __fanout_link(sk, po); - } else { + else dev_add_pack(&po->prot_hook); - rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); - } sock_hold(sk); po->running = 1; @@ -270,12 +292,11 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) { + + if (po->fanout) __fanout_unlink(sk, po); - } else { + else __dev_remove_pack(&po->prot_hook); - RCU_INIT_POINTER(po->cached_dev, NULL); - } __sock_put(sk); @@ -2059,19 +2080,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } -static struct net_device *packet_cached_dev_get(struct packet_sock *po) -{ - struct net_device *dev; - - rcu_read_lock(); - dev = rcu_dereference(po->cached_dev); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - - return dev; -} - static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; @@ -2088,7 +2096,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2242,7 +2250,7 @@ static int packet_snd(struct socket *sock, * Get and verify the address. */ - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2451,6 +2459,8 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2506,14 +2516,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc spin_lock(&po->bind_lock); unregister_prot_hook(sk, true); + po->num = protocol; po->prot_hook.type = protocol; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; + po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + if (protocol == 0) goto out_unlock; @@ -2626,7 +2639,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; - RCU_INIT_POINTER(po->cached_dev, NULL); + + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -3337,6 +3351,7 @@ static int packet_notifier(struct notifier_block *this, sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev);