From 5596732fa8c14139018ecda8356eabbfb599d830 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 7 Apr 2014 08:08:52 +0200 Subject: [PATCH 001/178] xfrm: Fix crash with ipv6 IPsec tunnel and NAT. The ipv6 xfrm output path is not aware that packets can be rerouted by NAT to not use IPsec. We crash in this case because we expect to have a xfrm state at the dst_entry. This crash happens if the ipv6 layer does IPsec and NAT or if we have an interfamily IPsec tunnel with ipv4 NAT. We fix this by checking for a NAT rerouted packet in each address family and dst_output() to the new destination in this case. Reported-by: Martin Pelikan Tested-by: Martin Pelikan Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_output.c | 34 +++++++++++++++++++--------------- net/ipv6/xfrm6_output.c | 22 +++++++++++++--------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index baa0f63731fd..f3800bf79d86 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED; - - skb->protocol = htons(ETH_P_IP); + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; return x->outer_mode->output2(x, skb); } @@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output); int xfrm4_output_finish(struct sk_buff *skb) { -#ifdef CONFIG_NETFILTER - if (!skb_dst(skb)->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); - } + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + skb->protocol = htons(ETH_P_IP); +#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IP); return xfrm_output(skb); } +static int __xfrm4_output(struct sk_buff *skb) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + +#ifdef CONFIG_NETFILTER + if (!x) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif + + return x->outer_mode->afinfo->output_finish(skb); +} + int xfrm4_output(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, dst->dev, - x->outer_mode->afinfo->output_finish, + NULL, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e37706..ba624331451c 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -114,12 +114,6 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif - - skb->protocol = htons(ETH_P_IPV6); skb->local_df = 1; return x->outer_mode->output2(x, skb); @@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output); int xfrm6_output_finish(struct sk_buff *skb) { + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + skb->protocol = htons(ETH_P_IPV6); + #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IPV6); return xfrm_output(skb); } @@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int mtu; +#ifdef CONFIG_NETFILTER + if (!x) { + IP6CB(skb)->flags |= IP6SKB_REROUTED; + return dst_output(skb); + } +#endif + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else @@ -165,6 +168,7 @@ static int __xfrm6_output(struct sk_buff *skb) int xfrm6_output(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, __xfrm6_output); + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + NULL, skb_dst(skb)->dev, __xfrm6_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } From ef67f18dece707eb2fb1ef25ccca47062842dc4a Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Sun, 30 Mar 2014 10:47:08 +0300 Subject: [PATCH 002/178] iwlwifi: mvm: several fixes in scan The firmware doesn't handle properly the fragmented scan. Stop using it. While at it change max_out_time and suspend_time units from usec to TUs as expected by firmware API. Signed-off-by: Alexander Bondar Signed-off-by: Emmanuel Grumbach --- .../net/wireless/iwlwifi/mvm/fw-api-scan.h | 8 +-- drivers/net/wireless/iwlwifi/mvm/scan.c | 53 +++++-------------- 2 files changed, 16 insertions(+), 45 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h index 9426905de6b2..d73a89ecd78a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h @@ -183,9 +183,9 @@ enum iwl_scan_type { * this number of packets were received (typically 1) * @passive2active: is auto switching from passive to active during scan allowed * @rxchain_sel_flags: RXON_RX_CHAIN_* - * @max_out_time: in usecs, max out of serving channel time + * @max_out_time: in TUs, max out of serving channel time * @suspend_time: how long to pause scan when returning to service channel: - * bits 0-19: beacon interal in usecs (suspend before executing) + * bits 0-19: beacon interal in TUs (suspend before executing) * bits 20-23: reserved * bits 24-31: number of beacons (suspend between channels) * @rxon_flags: RXON_FLG_* @@ -383,8 +383,8 @@ enum scan_framework_client { * @quiet_plcp_th: quiet channel num of packets threshold * @good_CRC_th: passive to active promotion threshold * @rx_chain: RXON rx chain. - * @max_out_time: max uSec to be out of assoceated channel - * @suspend_time: pause scan this long when returning to service channel + * @max_out_time: max TUs to be out of assoceated channel + * @suspend_time: pause scan this TUs when returning to service channel * @flags: RXON flags * @filter_flags: RXONfilter * @tx_cmd: tx command for active scan; for 2GHz and for 5GHz. diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index c91dc8498852..cba88a379fc8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -277,51 +277,22 @@ static void iwl_mvm_scan_calc_params(struct iwl_mvm *mvm, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_scan_condition_iterator, &global_bound); - /* - * Under low latency traffic passive scan is fragmented meaning - * that dwell on a particular channel will be fragmented. Each fragment - * dwell time is 20ms and fragments period is 105ms. Skipping to next - * channel will be delayed by the same period - 105ms. So suspend_time - * parameter describing both fragments and channels skipping periods is - * set to 105ms. This value is chosen so that overall passive scan - * duration will not be too long. Max_out_time in this case is set to - * 70ms, so for active scanning operating channel will be left for 70ms - * while for passive still for 20ms (fragment dwell). - */ - if (global_bound) { - if (!iwl_mvm_low_latency(mvm)) { - params->suspend_time = ieee80211_tu_to_usec(100); - params->max_out_time = ieee80211_tu_to_usec(600); - } else { - params->suspend_time = ieee80211_tu_to_usec(105); - /* P2P doesn't support fragmented passive scan, so - * configure max_out_time to be at least longest dwell - * time for passive scan. - */ - if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p) { - params->max_out_time = ieee80211_tu_to_usec(70); - params->passive_fragmented = true; - } else { - u32 passive_dwell; - /* - * Use band G so that passive channel dwell time - * will be assigned with maximum value. - */ - band = IEEE80211_BAND_2GHZ; - passive_dwell = iwl_mvm_get_passive_dwell(band); - params->max_out_time = - ieee80211_tu_to_usec(passive_dwell); - } - } + if (!global_bound) + goto not_bound; + + params->suspend_time = 100; + params->max_out_time = 600; + + if (iwl_mvm_low_latency(mvm)) { + params->suspend_time = 250; + params->max_out_time = 250; } +not_bound: + for (band = IEEE80211_BAND_2GHZ; band < IEEE80211_NUM_BANDS; band++) { - if (params->passive_fragmented) - params->dwell[band].passive = 20; - else - params->dwell[band].passive = - iwl_mvm_get_passive_dwell(band); + params->dwell[band].passive = iwl_mvm_get_passive_dwell(band); params->dwell[band].active = iwl_mvm_get_active_dwell(band, n_ssids); } From a32452366b7250c42e96a18ffc3ad8db9e0ca3c2 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Apr 2014 09:01:03 +0200 Subject: [PATCH 003/178] vti4: Don't count header length twice. We currently count the size of LL_MAX_HEADER and struct iphdr twice for vti4 devices, this leads to a wrong device mtu. The size of LL_MAX_HEADER and struct iphdr is already counted in ip_tunnel_bind_dev(), so don't do it again in vti_tunnel_init(). Fixes: b9959fd3 ("vti: switch to new ip tunnel code") Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 687ddef4e574..cd62596e9a87 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -349,7 +349,6 @@ static int vti_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &iph->daddr, 4); dev->type = ARPHRD_TUNNEL; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN; dev->flags = IFF_NOARP; dev->iflink = 0; From 062f1d6de01df545ca0df366c6133f0fa164bff6 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 22 Apr 2014 18:19:25 +0800 Subject: [PATCH 004/178] mac80211: avoid handling of SMPS for mesh The patch "mac80211: implement SMPS for AP" has caused kernel oops at mesh STA if the peer mesh STA operates in sleep mode and then becomes active mode. It can be easily reproduced by setting the following commands at peer mesh STA: iw mesh0 station set aa:bb:cc:dd:ee:ff mesh_power_mode deep iw mesh0 station set aa:bb:cc:dd:ee:ff mesh_power_mode active Kernel oops will happen at mesh STA aa:bb:cc:dd:ee:ff. Fix this by avoiding SMPS for mesh mode. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 137a192e64bc..847d92f6bef6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1148,7 +1148,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); /* This station just woke up and isn't aware of our SMPS state */ - if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, + if (!ieee80211_vif_is_mesh(&sdata->vif) && + !ieee80211_smps_is_restrictive(sta->known_smps_mode, sdata->smps_mode) && sta->known_smps_mode != sdata->bss->req_smps && sta_info_tx_streams(sta) != 1) { From 311ab8e1c28659e16a6fe14a68929c3ced4a5757 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 16 Apr 2014 08:32:41 -0400 Subject: [PATCH 005/178] mac80211: fixup radiotap tx flags for RTS/CTS When using RTS/CTS, the CTS-to-Self bit in radiotap TX flags is getting set instead of the RTS bit. Set the correct one. Reported-by: Larry Maxwell Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/status.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 00ba90b02ab2..60cb7a665976 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -314,10 +314,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, !is_multicast_ether_addr(hdr->addr1)) txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) txflags |= IEEE80211_RADIOTAP_F_TX_CTS; - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) txflags |= IEEE80211_RADIOTAP_F_TX_RTS; put_unaligned_le16(txflags, pos); From db66d756c74acb886c51f11b501c2fe622018a0a Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 18 Apr 2014 01:59:15 +0900 Subject: [PATCH 006/178] sched/docbook: Fix 'make htmldocs' warnings caused by missing description When 'flags' argument to sched_{set,get}attr() syscalls were added in: 6d35ab48090b ("sched: Add 'flags' argument to sched_{set,get}attr() syscalls") no description for 'flags' was added. It causes the following warnings on "make htmldocs": Warning(/kernel/sched/core.c:3645): No description found for parameter 'flags' Warning(/kernel/sched/core.c:3789): No description found for parameter 'flags' Signed-off-by: Masanari Iida Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1397753955-2914-1-git-send-email-standby24x7@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..9fe2190005cb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3639,6 +3639,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) * sys_sched_setattr - same as above, but with extended sched_attr * @pid: the pid in question. * @uattr: structure containing the extended parameters. + * @flags: for future extension. */ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, unsigned int, flags) @@ -3783,6 +3784,7 @@ err_size: * @pid: the pid in question. * @uattr: structure containing the extended parameters. * @size: sizeof(attr) for fwd/bwd comp. + * @flags: for future extension. */ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, unsigned int, size, unsigned int, flags) From 61622cc6f29034d0479f7ac16f3d48f1eeabf3a1 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 29 Apr 2014 07:50:44 +0200 Subject: [PATCH 007/178] xfrm4: Properly handle unsupported protocols We don't catch the case if an unsupported protocol is submitted to the xfrm4 protocol handlers, this can lead to NULL pointer dereferences. Fix this by adding the appropriate checks. Fixes: 3328715e ("xfrm4: Add IPsec protocol multiplexer") Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_protocol.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 7f7b243e8139..a2ce0101eaac 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -50,8 +50,12 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm4_protocol *handler; + struct xfrm4_protocol __rcu **head = proto_handlers(protocol); - for_each_protocol_rcu(*proto_handlers(protocol), handler) + if (!head) + return 0; + + for_each_protocol_rcu(*head, handler) if ((ret = handler->cb_handler(skb, err)) <= 0) return ret; @@ -64,15 +68,20 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, { int ret; struct xfrm4_protocol *handler; + struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); - for_each_protocol_rcu(*proto_handlers(nexthdr), handler) + if (!head) + goto out; + + for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); kfree_skb(skb); @@ -208,6 +217,9 @@ int xfrm4_protocol_register(struct xfrm4_protocol *handler, int ret = -EEXIST; int priority = handler->priority; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm4_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), @@ -250,6 +262,9 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, struct xfrm4_protocol *t; int ret = -ENOENT; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm4_protocol_mutex); for (pprev = proto_handlers(protocol); From fe337ac2839521b360f828b3ebd992d597b1ad16 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Apr 2014 21:07:31 +0200 Subject: [PATCH 008/178] netfilter: ctnetlink: don't add null bindings if no nat requested commit 0eba801b64cc8284d9024c7ece30415a2b981a72 tried to fix a race where nat initialisation can happen after ctnetlink-created conntrack has been created. However, it causes the nat module(s) to be loaded needlessly on systems that are not using NAT. Fortunately, we do not have to create null bindings in that case. conntracks injected via ctnetlink always have the CONFIRMED bit set, which prevents addition of the nat extension in nf_nat_ipv4/6_fn(). We only need to make sure that either no nat extension is added or that we've created both src and dst manips. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ccc46fa5edbc..58579634427d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1336,6 +1336,9 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) #ifdef CONFIG_NF_NAT_NEEDED int ret; + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) From 895162b1101b3ea5db08ca6822ae9672717efec0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 2 May 2014 15:32:16 +0200 Subject: [PATCH 009/178] netfilter: ipv4: defrag: set local_df flag on defragmented skb else we may fail to forward skb even if original fragments do fit outgoing link mtu: 1. remote sends 2k packets in two 1000 byte frags, DF set 2. we want to forward but only see '2k > mtu and DF set' 3. we then send icmp error saying that outgoing link is 1500 But original sender never sent a packet that would not fit the outgoing link. Setting local_df makes outgoing path test size vs. IPCB(skb)->frag_max_size, so we will still send the correct error in case the largest original size did not fit outgoing link mtu. Reported-by: Maxime Bizon Suggested-by: Maxime Bizon Fixes: 5f2d04f1f9 (ipv4: fix path MTU discovery with connection tracking) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 12e13bd82b5b..f40f321b41fc 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -22,7 +22,6 @@ #endif #include -/* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { int err; @@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) err = ip_defrag(skb, user); local_bh_enable(); - if (!err) + if (!err) { ip_send_check(ip_hdr(skb)); + skb->local_df = 1; + } return err; } From ecd15dd7e45f3683fa8142b9f2c015dfaa0c243d Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Sun, 4 May 2014 13:35:37 +0200 Subject: [PATCH 010/178] netfilter: nfnetlink: Fix use after free when it fails to process batch This bug manifests when calling the nft command line tool without nf_tables kernel support. kernel message: [ 44.071555] Netfilter messages via NETLINK v0.30. [ 44.072253] BUG: unable to handle kernel NULL pointer dereference at 0000000000000119 [ 44.072264] IP: [] netlink_getsockbyportid+0xf/0x70 [ 44.072272] PGD 7f2b74067 PUD 7f2b73067 PMD 0 [ 44.072277] Oops: 0000 [#1] SMP [...] [ 44.072369] Call Trace: [ 44.072373] [] netlink_unicast+0x91/0x200 [ 44.072377] [] netlink_ack+0x99/0x110 [ 44.072381] [] nfnetlink_rcv+0x3c1/0x408 [nfnetlink] [ 44.072385] [] netlink_unicast+0xf3/0x200 [ 44.072389] [] netlink_sendmsg+0x2ff/0x740 [ 44.072394] [] ? __mmdrop+0x62/0x90 [ 44.072398] [] sock_sendmsg+0x8b/0xc0 [ 44.072403] [] ? copy_user_enhanced_fast_string+0x5/0x10 [ 44.072406] [] ? move_addr_to_kernel+0x2c/0x50 [ 44.072410] [] ___sys_sendmsg+0x3c3/0x3d0 [ 44.072415] [] ? handle_mm_fault+0xa9a/0xc60 [ 44.072420] [] ? mmap_region+0x166/0x5a0 [ 44.072424] [] ? __do_page_fault+0x1dc/0x510 [ 44.072428] [] ? apparmor_capable+0x1c/0x60 [ 44.072435] [] ? _raw_spin_unlock_bh+0x1a/0x20 [ 44.072439] [] ? release_sock+0x106/0x150 [ 44.072443] [] __sys_sendmsg+0x42/0x80 [ 44.072446] [] SyS_sendmsg+0x12/0x20 [ 44.072450] [] system_call_fastpath+0x1a/0x1f Signed-off-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e009087620e3..23ef77c60fff 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -256,15 +256,15 @@ replay: #endif { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(nskb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(skb); } while (skb->len >= nlmsg_total_size(0)) { From f4ebddf9abb1fb0add1ce8e9c0bc414f1c17d81d Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Thu, 1 May 2014 10:03:46 +0200 Subject: [PATCH 011/178] mac80211: Fix mac80211 station info rx bitrate for IBSS mode Filter out incoming multicast packages before applying their bitrate to the rx bitrate station info field to prevent them from setting the rx bitrate to the basic multicast rate. Signed-off-by: Henning Rogge Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 216c45b949e5..2b608b2b70ec 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1231,7 +1231,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { sta->last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) { + if (ieee80211_is_data(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { sta->last_rx_rate_idx = status->rate_idx; sta->last_rx_rate_flag = status->flag; sta->last_rx_rate_vht_flag = status->vht_flag; From c1fbb258846dfc425507a093922d2d001e54c3ea Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 15:58:13 +0300 Subject: [PATCH 012/178] cfg80211: free sme on connection failures cfg80211 is notified about connection failures by __cfg80211_connect_result() call. However, this function currently does not free cfg80211 sme. This results in hanging connection attempts in some cases e.g. when mac80211 authentication attempt is denied, we have this function call: ieee80211_rx_mgmt_auth() -> cfg80211_rx_mlme_mgmt() -> cfg80211_process_auth() -> cfg80211_sme_rx_auth() -> __cfg80211_connect_result() but cfg80211_sme_free() is never get called. Fixes: ceca7b712 ("cfg80211: separate internal SME implementation") Cc: stable@vger.kernel.org (3.10+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index acdcb4a81817..3546a77033de 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -234,7 +234,6 @@ void cfg80211_conn_work(struct work_struct *work) NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - cfg80211_sme_free(wdev); } wdev_unlock(wdev); } @@ -648,6 +647,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wdev->wiphy, bss); } + cfg80211_sme_free(wdev); return; } From 792e6aa7a15ea0fb16f8687e93caede1ea9118c7 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 16:14:23 +0300 Subject: [PATCH 013/178] cfg80211: add cfg80211_sched_scan_stopped_rtnl Add locked-version for cfg80211_sched_scan_stopped. This is used for some users that might want to call it when rtnl is already locked. Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart") Cc: stable@vger.kernel.org (3.14+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ net/wireless/scan.c | 12 ++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f3539a15c411..f856e5a746fa 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3668,6 +3668,18 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy); */ void cfg80211_sched_scan_stopped(struct wiphy *wiphy); +/** + * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped + * + * @wiphy: the wiphy on which the scheduled scan stopped + * + * The driver can call this function to inform cfg80211 that the + * scheduled scan had to be stopped, for whatever reason. The driver + * is then called back via the sched_scan_stop operation when done. + * This function should be called with rtnl locked. + */ +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy); + /** * cfg80211_inform_bss_width_frame - inform cfg80211 of a received BSS frame * diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7d09a712cb1f..88f108edfb58 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -284,14 +284,22 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + ASSERT_RTNL(); + trace_cfg80211_sched_scan_stopped(wiphy); - rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); +} +EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); + +void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +{ + rtnl_lock(); + cfg80211_sched_scan_stopped_rtnl(wiphy); rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); From e669ba2d06c6195662601956454ac959892f0762 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 16:14:24 +0300 Subject: [PATCH 014/178] mac80211: fix nested rtnl locking on ieee80211_reconfig ieee80211_reconfig already holds rtnl, so calling cfg80211_sched_scan_stopped results in deadlock. Use the rtnl-version of this function instead. Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart") Cc: stable@vger.kernel.org (3.14+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 275c94f995f7..3c365837e910 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1780,7 +1780,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (sched_scan_stopped) - cfg80211_sched_scan_stopped(local->hw.wiphy); + cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); /* * If this is for hw restart things are still running. From 7c3d5ab1f35f5475b1a1fbe74143683cfc092d33 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 May 2014 03:14:04 +0400 Subject: [PATCH 015/178] ipv4: fix "conntrack zones" support for defrag user check in ip_expire Defrag user check in ip_expire was not updated after adding support for "conntrack zones". This bug manifests as a RFC violation, since the router will send the icmp time exceeeded message when using conntrack zones. Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_fragment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index c10a3ce5cbff..ed32313e307c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg) * "Fragment Reassembly Timeout" message, per RFC792. */ if (qp->user == IP_DEFRAG_AF_PACKET || - (qp->user == IP_DEFRAG_CONNTRACK_IN && - skb_rtable(head)->rt_type != RTN_LOCAL)) + ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && + (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && + (skb_rtable(head)->rt_type != RTN_LOCAL))) goto out_rcu_unlock; From aff09ce303f83bd370772349238482ae422a2341 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 5 May 2014 00:17:48 +0400 Subject: [PATCH 016/178] bridge: superfluous skb->nfct check in br_nf_dev_queue_xmit Currently bridge can silently drop ipv4 fragments. If node have loaded nf_defrag_ipv4 module but have no nf_conntrack_ipv4, br_nf_pre_routing defragments incoming ipv4 fragments but nfct check in br_nf_dev_queue_xmit does not allow re-fragment combined packet back, and therefore it is dropped in br_dev_queue_push_xmit without incrementing of any failcounters It seems the only way to hit the ip_fragment code in the bridge xmit path is to have a fragment list whose reassembled fragments go over the mtu. This only happens if nf_defrag is enabled. Thanks to Florian Westphal for providing feedback to clarify this. Defragmentation ipv4 is required not only in conntracks but at least in TPROXY target and socket match, therefore #ifdef is changed from NF_CONNTRACK_IPV4 to NF_DEFRAG_IPV4 Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80e1b0f60a30..2acf7fa1fec6 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -859,12 +859,12 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; - if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && + if (skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) { if (br_parse_ip_options(skb)) From edb666f07e539d92f63284213d72083ed8ac05ea Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 29 Apr 2014 08:23:03 +0200 Subject: [PATCH 017/178] xfrm6: Properly handle unsupported protocols We don't catch the case if an unsupported protocol is submitted to the xfrm6 protocol handlers, this can lead to NULL pointer dereferences. Fix this by adding the appropriate checks. Fixes: 7e14ea15 ("xfrm6: Add IPsec protocol multiplexer") Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_protocol.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 6ab989c486f7..54d13f8dbbae 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -50,6 +50,10 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(protocol); + + if (!head) + return 0; for_each_protocol_rcu(*proto_handlers(protocol), handler) if ((ret = handler->cb_handler(skb, err)) <= 0) @@ -184,10 +188,12 @@ int xfrm6_protocol_register(struct xfrm6_protocol *handler, struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; bool add_netproto = false; - int ret = -EEXIST; int priority = handler->priority; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm6_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), @@ -230,6 +236,9 @@ int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, struct xfrm6_protocol *t; int ret = -ENOENT; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm6_protocol_mutex); for (pprev = proto_handlers(protocol); From 0fed2bcf17d2a049c079ffd2b7bc4caca4b7f906 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 13 Apr 2014 22:55:27 +0300 Subject: [PATCH 018/178] iwlwifi: mvm: BT Coex - fix validity flags during init The commit below introduced a bug in the validity bits in init. Due to that, all the Coex mechanism stopped sending kills to the BT side. Fix that. Fixes: b9fae2d54c9f ("iwlwifi: mvm: BT Coex add support for Co-running block") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/coex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/coex.c b/drivers/net/wireless/iwlwifi/mvm/coex.c index fa858d548d13..0489314425cb 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex.c @@ -611,14 +611,14 @@ int iwl_send_bt_init_conf(struct iwl_mvm *mvm) bt_cmd->flags |= cpu_to_le32(BT_COEX_SYNC2SCO); if (IWL_MVM_BT_COEX_CORUNNING) { - bt_cmd->valid_bit_msk = cpu_to_le32(BT_VALID_CORUN_LUT_20 | - BT_VALID_CORUN_LUT_40); + bt_cmd->valid_bit_msk |= cpu_to_le32(BT_VALID_CORUN_LUT_20 | + BT_VALID_CORUN_LUT_40); bt_cmd->flags |= cpu_to_le32(BT_COEX_CORUNNING); } if (IWL_MVM_BT_COEX_MPLUT) { bt_cmd->flags |= cpu_to_le32(BT_COEX_MPLUT); - bt_cmd->valid_bit_msk = cpu_to_le32(BT_VALID_MULTI_PRIO_LUT); + bt_cmd->valid_bit_msk |= cpu_to_le32(BT_VALID_MULTI_PRIO_LUT); } if (mvm->cfg->bt_shared_single_ant) From 8e96440e8d42da0dab02cae125ea59e2b64c77fe Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 6 May 2014 20:35:10 +0300 Subject: [PATCH 019/178] iwlwifi: mvm: rs - s/CPTCFG/CONFIG My bad - I forgot to update this when sending the patch upstream. Fixes: 87d5e4155c00 ("iwlwifi: mvm: rs: reinit rs if no tx for a long time") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/rs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c index 9f52c5b3f0ec..e1c838899363 100644 --- a/drivers/net/wireless/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/iwlwifi/mvm/rs.c @@ -1010,7 +1010,7 @@ static void rs_tx_status(void *mvm_r, struct ieee80211_supported_band *sband, return; } -#ifdef CPTCFG_MAC80211_DEBUGFS +#ifdef CONFIG_MAC80211_DEBUGFS /* Disable last tx check if we are debugging with fixed rate */ if (lq_sta->dbg_fixed_rate) { IWL_DEBUG_RATE(mvm, "Fixed rate. avoid rate scaling\n"); From bd5e4744a6ca64299b57a2682c720d00a475a734 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Thu, 24 Apr 2014 13:15:29 +0300 Subject: [PATCH 020/178] iwlwifi: mvm: do no sched scan while associated Currently the FW doesn't support sched scan while associated, Prevent it. Signed-off-by: David Spinadel Reviewed-by: Johannes Berg Reviewed-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 5 +++++ drivers/net/wireless/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/iwlwifi/mvm/utils.c | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index f0cebf12c7b8..593f723a74c4 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1807,6 +1807,11 @@ static int iwl_mvm_mac_sched_scan_start(struct ieee80211_hw *hw, mutex_lock(&mvm->mutex); + if (iwl_mvm_is_associated(mvm)) { + ret = -EBUSY; + goto out; + } + switch (mvm->scan_status) { case IWL_MVM_SCAN_OS: IWL_DEBUG_SCAN(mvm, "Stopping previous scan for sched_scan\n"); diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index d564233a65da..84c75a1b267e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -1003,6 +1003,9 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif) return mvmvif->low_latency; } +/* Assoc status */ +bool iwl_mvm_is_associated(struct iwl_mvm *mvm); + /* Thermal management and CT-kill */ void iwl_mvm_tt_tx_backoff(struct iwl_mvm *mvm, u32 backoff); void iwl_mvm_tt_handler(struct iwl_mvm *mvm); diff --git a/drivers/net/wireless/iwlwifi/mvm/utils.c b/drivers/net/wireless/iwlwifi/mvm/utils.c index d619851745a1..6fdbef9696d8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/iwlwifi/mvm/utils.c @@ -644,3 +644,22 @@ bool iwl_mvm_low_latency(struct iwl_mvm *mvm) return result; } + +static void iwl_mvm_assoc_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) +{ + bool *assoc = _data; + + if (vif->bss_conf.assoc) + *assoc = true; +} + +bool iwl_mvm_is_associated(struct iwl_mvm *mvm) +{ + bool assoc = false; + + ieee80211_iterate_active_interfaces_atomic( + mvm->hw, IEEE80211_IFACE_ITER_NORMAL, + iwl_mvm_assoc_iter, &assoc); + + return assoc; +} From e5c460f46ae7ee94831cb55cb980f942aa9e5a85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 May 2014 21:27:37 -0700 Subject: [PATCH 021/178] sparc64: Don't bark so loudly about 32-bit tasks generating 64-bit fault addresses. This was found using Dave Jone's trinity tool. When a user process which is 32-bit performs a load or a store, the cpu chops off the top 32-bits of the effective address before translating it. This is because we run 32-bit tasks with the PSTATE_AM (address masking) bit set. We can't run the kernel with that bit set, so when the kernel accesses userspace no address masking occurs. Since a 32-bit process will have no mappings in that region we will properly fault, so we don't try to handle this using access_ok(), which can safely just be a NOP on sparc64. Real faults from 32-bit processes should never generate such addresses so a bug check was added long ago, and it barks in the logs if this happens. But it also barks when a kernel user access causes this condition, and that _can_ happen. For example, if a pointer passed into a system call is "0xfffffffc" and the kernel access 4 bytes offset from that pointer. Just handle such faults normally via the exception entries. Signed-off-by: David S. Miller --- arch/sparc/mm/fault_64.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a8ff0d1a3b69..4ced3fc66130 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -281,18 +281,6 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) show_regs(regs); } -static void noinline __kprobes bogus_32bit_fault_address(struct pt_regs *regs, - unsigned long addr) -{ - static int times; - - if (times++ < 10) - printk(KERN_ERR "FAULT[%s:%d]: 32-bit process " - "reports 64-bit fault address [%lx]\n", - current->comm, current->pid, addr); - show_regs(regs); -} - asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); @@ -322,10 +310,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) goto intr_or_no_mm; } } - if (unlikely((address >> 32) != 0)) { - bogus_32bit_fault_address(regs, address); + if (unlikely((address >> 32) != 0)) goto intr_or_no_mm; - } } if (regs->tstate & TSTATE_PRIV) { From 46ce0fe97a6be7532ce6126bb26ce89fed81528c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 May 2014 16:56:01 +0200 Subject: [PATCH 022/178] perf: Fix race in removing an event When removing a (sibling) event we do: raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); raw_spin_unlock_irq(&ctx->lock); perf_remove_from_context(event); raw_spin_lock_irq(&ctx->lock); ... raw_spin_unlock_irq(&ctx->lock); Now, assuming the event is a sibling, it will be 'unreachable' for things like ctx_sched_out() because that iterates the groups->siblings, and we just unhooked the sibling. So, if during we get ctx_sched_out(), it will miss the event and not call event_sched_out() on it, leaving it programmed on the PMU. The subsequent perf_remove_from_context() call will find the ctx is inactive and only call list_del_event() to remove the event from all other lists. Hereafter we can proceed to free the event; while still programmed! Close this hole by moving perf_group_detach() inside the same ctx->lock region(s) perf_remove_from_context() has. The condition on inherited events only in __perf_event_exit_task() is likely complete crap because non-inherited events are part of groups too and we're tearing down just the same. But leave that for another patch. Most-likely-Fixes: e03a9a55b4e ("perf: Change close() semantics for group events") Reported-by: Vince Weaver Tested-by: Vince Weaver Much-staring-at-traces-by: Vince Weaver Much-staring-at-traces-by: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140505093124.GN17778@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 47 ++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..ea899e2b5593 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1443,6 +1443,11 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } +struct remove_event { + struct perf_event *event; + bool detach_group; +}; + /* * Cross CPU call to remove a performance event * @@ -1451,12 +1456,15 @@ group_sched_out(struct perf_event *group_event, */ static int __perf_remove_from_context(void *info) { - struct perf_event *event = info; + struct remove_event *re = info; + struct perf_event *event = re->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); raw_spin_lock(&ctx->lock); event_sched_out(event, cpuctx, ctx); + if (re->detach_group) + perf_group_detach(event); list_del_event(event, ctx); if (!ctx->nr_events && cpuctx->task_ctx == ctx) { ctx->is_active = 0; @@ -1481,10 +1489,14 @@ static int __perf_remove_from_context(void *info) * When called from perf_event_exit_task, it's OK because the * context has been detached from its task. */ -static void perf_remove_from_context(struct perf_event *event) +static void perf_remove_from_context(struct perf_event *event, bool detach_group) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; + struct remove_event re = { + .event = event, + .detach_group = detach_group, + }; lockdep_assert_held(&ctx->mutex); @@ -1493,12 +1505,12 @@ static void perf_remove_from_context(struct perf_event *event) * Per cpu events are removed via an smp call and * the removal is always successful. */ - cpu_function_call(event->cpu, __perf_remove_from_context, event); + cpu_function_call(event->cpu, __perf_remove_from_context, &re); return; } retry: - if (!task_function_call(task, __perf_remove_from_context, event)) + if (!task_function_call(task, __perf_remove_from_context, &re)) return; raw_spin_lock_irq(&ctx->lock); @@ -1515,6 +1527,8 @@ retry: * Since the task isn't running, its safe to remove the event, us * holding the ctx->lock ensures the task won't get scheduled in. */ + if (detach_group) + perf_group_detach(event); list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); } @@ -3281,10 +3295,7 @@ int perf_event_release_kernel(struct perf_event *event) * to trigger the AB-BA case. */ mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); - raw_spin_lock_irq(&ctx->lock); - perf_group_detach(event); - raw_spin_unlock_irq(&ctx->lock); - perf_remove_from_context(event); + perf_remove_from_context(event, true); mutex_unlock(&ctx->mutex); free_event(event); @@ -7165,7 +7176,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_context *gctx = group_leader->ctx; mutex_lock(&gctx->mutex); - perf_remove_from_context(group_leader); + perf_remove_from_context(group_leader, false); /* * Removing from the context ends up with disabled @@ -7175,7 +7186,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_remove_from_context(sibling); + perf_remove_from_context(sibling, false); perf_event__state_init(sibling); put_ctx(gctx); } @@ -7305,7 +7316,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) mutex_lock(&src_ctx->mutex); list_for_each_entry_safe(event, tmp, &src_ctx->event_list, event_entry) { - perf_remove_from_context(event); + perf_remove_from_context(event, false); unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); list_add(&event->migrate_entry, &events); @@ -7367,13 +7378,7 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - if (child_event->parent) { - raw_spin_lock_irq(&child_ctx->lock); - perf_group_detach(child_event); - raw_spin_unlock_irq(&child_ctx->lock); - } - - perf_remove_from_context(child_event); + perf_remove_from_context(child_event, !!child_event->parent); /* * It can happen that the parent exits first, and has events @@ -7857,14 +7862,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) static void __perf_event_exit_context(void *__info) { + struct remove_event re = { .detach_group = false }; struct perf_event_context *ctx = __info; - struct perf_event *event; perf_pmu_rotate_stop(ctx->pmu); rcu_read_lock(); - list_for_each_entry_rcu(event, &ctx->event_list, event_entry) - __perf_remove_from_context(event); + list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) + __perf_remove_from_context(&re); rcu_read_unlock(); } From ffb4ef21ac4308c2e738e6f83b6741bbc9b4fa3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 May 2014 19:12:20 +0200 Subject: [PATCH 023/178] perf: Fix perf_event_init_context() perf_pin_task_context() can return NULL but perf_event_init_context() assumes it will not, correct this. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20140505171428.GU26782@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index ea899e2b5593..71232844f235 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7729,6 +7729,8 @@ int perf_event_init_context(struct task_struct *child, int ctxn) * swapped under us. */ parent_ctx = perf_pin_task_context(parent, ctxn); + if (!parent_ctx) + return 0; /* * No need to check if parent_ctx != NULL here; since we saw From a4b4f11b2783ec678cccb4ce7e4ce9665aa04a24 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 29 Apr 2014 15:04:41 +0800 Subject: [PATCH 024/178] perf/x86/intel: Fix Silvermont's event constraints Event 0x013c is not the same as fixed counter2, remove it from Silvermont's event constraints. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1398755081-12471-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index aa333d966886..adb02aa62af5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ EVENT_CONSTRAINT_END }; From 2d513868e2a33e1d5315490ef4c861ee65babd65 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2014 23:26:24 +0200 Subject: [PATCH 025/178] sched: Sanitize irq accounting madness Russell reported, that irqtime_account_idle_ticks() takes ages due to: for (i = 0; i < ticks; i++) irqtime_account_process_tick(current, 0, rq); It's sad, that this code was written way _AFTER_ the NOHZ idle functionality was available. I charge myself guitly for not paying attention when that crap got merged with commit abb74cefa ("sched: Export ns irqtimes through /proc/stat") So instead of looping nr_ticks times just apply the whole thing at once. As a side note: The whole cputime_t vs. u64 business in that context wants to be cleaned up as well. There is no point in having all these back and forth conversions. Lets standardise on u64 nsec for all kernel internal accounting and be done with it. Everything else does not make sense at all for fine grained accounting. Frederic, can you please take care of that? Reported-by: Russell King Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Signed-off-by: Peter Zijlstra Cc: Venkatesh Pallipadi Cc: Shaun Ruffell Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1405022307000.6261@ionos.tec.linutronix.de Signed-off-by: Ingo Molnar --- kernel/sched/cputime.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a95097cb4591..72fdf06ef865 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -332,50 +332,50 @@ out: * softirq as those do not count in task exec_runtime any more. */ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) + struct rq *rq, int ticks) { - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); + u64 cputime = (__force u64) cputime_one_jiffy; u64 *cpustat = kcpustat_this_cpu->cpustat; if (steal_account_process_tick()) return; + cputime *= ticks; + scaled *= ticks; + if (irqtime_account_hi_update()) { - cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; + cpustat[CPUTIME_IRQ] += cputime; } else if (irqtime_account_si_update()) { - cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; + cpustat[CPUTIME_SOFTIRQ] += cputime; } else if (this_cpu_ksoftirqd() == p) { /* * ksoftirqd time do not get accounted in cpu_softirq_time. * So, we have to handle it separately here. * Also, p->stime needs to be updated for ksoftirqd. */ - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SOFTIRQ); + __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); } else if (user_tick) { - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + account_user_time(p, cputime, scaled); } else if (p == rq->idle) { - account_idle_time(cputime_one_jiffy); + account_idle_time(cputime); } else if (p->flags & PF_VCPU) { /* System time or guest time */ - account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); + account_guest_time(p, cputime, scaled); } else { - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SYSTEM); + __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); } } static void irqtime_account_idle_ticks(int ticks) { - int i; struct rq *rq = this_rq(); - for (i = 0; i < ticks; i++) - irqtime_account_process_tick(current, 0, rq); + irqtime_account_process_tick(current, 0, rq, ticks); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ static inline void irqtime_account_idle_ticks(int ticks) {} static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) {} + struct rq *rq, int nr_ticks) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ /* @@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick) return; if (sched_clock_irqtime) { - irqtime_account_process_tick(p, user_tick, rq); + irqtime_account_process_tick(p, user_tick, rq, 1); return; } From 5bfd126e80dca70431aef8fdbc1cf14535f3c338 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 15 Apr 2014 13:49:04 +0200 Subject: [PATCH 026/178] sched/deadline: Fix sched_yield() behavior yield_task_dl() is broken: o it forces current to be throttled setting its runtime to zero; o it sets current's dl_se->dl_new to one, expecting that dl_task_timer() will queue it back with proper parameters at replenish time. Unfortunately, dl_task_timer() has this check at the very beginning: if (!dl_task(p) || dl_se->dl_new) goto unlock; So, it just bails out and the task is never replenished. It actually yielded forever. To fix this, introduce a new flag indicating that the task properly yielded the CPU before its current runtime expired. While this is a little overdoing at the moment, the flag would be useful in the future to discriminate between "good" jobs (of which remaining runtime could be reclaimed, i.e. recycled) and "bad" jobs (for which dl_throttled task has been set) that needed to be stopped. Reported-by: yjay.kim Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140429103953.e68eba1b2ac3309214e3dc5a@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 +++++-- kernel/sched/core.c | 1 + kernel/sched/deadline.c | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 25f54c79f757..2a4298fb0d85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1153,9 +1153,12 @@ struct sched_dl_entity { * * @dl_boosted tells if we are boosted due to DI. If so we are * outside bandwidth enforcement mechanism (but only until we - * exit the critical section). + * exit the critical section); + * + * @dl_yielded tells if task gave up the cpu before consuming + * all its available runtime during the last job. */ - int dl_throttled, dl_new, dl_boosted; + int dl_throttled, dl_new, dl_boosted, dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9fe2190005cb..e62c65a12d5b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3124,6 +3124,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); dl_se->dl_throttled = 0; dl_se->dl_new = 1; + dl_se->dl_yielded = 0; } static void __setscheduler_params(struct task_struct *p, diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b08095786cb8..800e99b99075 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); dl_se->dl_throttled = 0; + dl_se->dl_yielded = 0; if (p->on_rq) { enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); if (task_has_dl_policy(rq->curr)) @@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq) * We make the task go to sleep until its current deadline by * forcing its runtime to zero. This way, update_curr_dl() stops * it and the bandwidth timer will wake it up and will give it - * new scheduling parameters (thanks to dl_new=1). + * new scheduling parameters (thanks to dl_yielded=1). */ if (p->dl.runtime > 0) { - rq->curr->dl.dl_new = 1; + rq->curr->dl.dl_yielded = 1; p->dl.runtime = 0; } update_curr_dl(rq); From 6a7cd273dc4bc3246f37ebe874754a54ccb29141 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 17 Apr 2014 10:05:02 +0800 Subject: [PATCH 027/178] sched/deadline: Fix memory leak Free cpudl->free_cpus allocated in cpudl_init(). Signed-off-by: Li Zefan Acked-by: Juri Lelli Signed-off-by: Peter Zijlstra Cc: # 3.14+ Link: http://lkml.kernel.org/r/534F36CE.2000409@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpudeadline.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5b9bb42b2d47..ab001b5d5048 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -210,7 +210,5 @@ int cpudl_init(struct cpudl *cp) */ void cpudl_cleanup(struct cpudl *cp) { - /* - * nothing to do for the moment - */ + free_cpumask_var(cp->free_cpus); } From 6227cb00cc120f9a43ce8313bb0475ddabcb7d01 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Sun, 13 Apr 2014 09:34:53 -0400 Subject: [PATCH 028/178] sched: Use CPUPRI_NR_PRIORITIES instead of MAX_RT_PRIO in cpupri check The check at the beginning of cpupri_find() makes sure that the task_pri variable does not exceed the cp->pri_to_cpu array length. But that length is CPUPRI_NR_PRIORITIES not MAX_RT_PRIO, where it will miss the last two priorities in that array. As task_pri is computed from convert_prio() which should never be bigger than CPUPRI_NR_PRIORITIES, if the check should cause a panic if it is hit. Reported-by: Mike Galbraith Signed-off-by: Steven Rostedt Signed-off-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1397015410.5212.13.camel@marge.simpson.net Signed-off-by: Ingo Molnar --- kernel/sched/cpupri.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 8b836b376d91..3031bac8aa3e 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -70,8 +70,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, int idx = 0; int task_pri = convert_prio(p->prio); - if (task_pri >= MAX_RT_PRIO) - return 0; + BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); for (idx = 0; idx < task_pri; idx++) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; From 6ccdc84b81a0a6c09a7f0427761d2f8cecfc2218 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Apr 2014 12:00:47 +0200 Subject: [PATCH 029/178] sched: Skip double execution of pick_next_task_fair() Tim wrote: "The current code will call pick_next_task_fair a second time in the slow path if we did not pull any task in our first try. This is really unnecessary as we already know no task can be pulled and it doubles the delay for the cpu to enter idle. We instrumented some network workloads and that saw that pick_next_task_fair is frequently called twice before a cpu enters idle. The call to pick_next_task_fair can add non trivial latency as it calls load_balance which runs find_busiest_group on an hierarchy of sched domains spanning the cpus for a large system. For some 4 socket systems, we saw almost 0.25 msec spent per call of pick_next_task_fair before a cpu can be idled." Optimize the second call away for the common case and document the dependency. Reported-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Len Brown Link: http://lkml.kernel.org/r/20140424100047.GP11096@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e62c65a12d5b..28921ec91b3d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2592,8 +2592,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev) if (likely(prev->sched_class == class && rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq, prev); - if (likely(p && p != RETRY_TASK)) - return p; + if (unlikely(p == RETRY_TASK)) + goto again; + + /* assumes fair_sched_class->next == idle_sched_class */ + if (unlikely(!p)) + p = idle_sched_class.pick_next_task(rq, prev); + + return p; } again: From 0e5b5337f0da073e1f17aec3c322ea7826975d0d Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 28 Apr 2014 15:45:54 -0700 Subject: [PATCH 030/178] sched: Fix updating rq->max_idle_balance_cost and rq->next_balance in idle_balance() The following commit: e5fc66119ec9 ("sched: Fix race in idle_balance()") can potentially cause rq->max_idle_balance_cost to not be updated, even when load_balance(NEWLY_IDLE) is attempted and the per-sd max cost value is updated. Preeti noticed a similar issue with updating rq->next_balance. In this patch, we fix this by making sure we still check/update those values even if a task gets enqueued while browsing the domains. Signed-off-by: Jason Low Reviewed-by: Preeti U Murthy Signed-off-by: Peter Zijlstra Cc: morten.rasmussen@arm.com Cc: aswin@hp.com Cc: daniel.lezcano@linaro.org Cc: alex.shi@linaro.org Cc: efault@gmx.de Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1398725155-7591-2-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7570dd969c28..0fdb96de81a5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6653,6 +6653,7 @@ static int idle_balance(struct rq *this_rq) int this_cpu = this_rq->cpu; idle_enter_fair(this_rq); + /* * We must set idle_stamp _before_ calling idle_balance(), such that we * measure the duration of idle_balance() as idle time. @@ -6705,14 +6706,16 @@ static int idle_balance(struct rq *this_rq) raw_spin_lock(&this_rq->lock); + if (curr_cost > this_rq->max_idle_balance_cost) + this_rq->max_idle_balance_cost = curr_cost; + /* - * While browsing the domains, we released the rq lock. - * A task could have be enqueued in the meantime + * While browsing the domains, we released the rq lock, a task could + * have been enqueued in the meantime. Since we're not going idle, + * pretend we pulled a task. */ - if (this_rq->cfs.h_nr_running && !pulled_task) { + if (this_rq->cfs.h_nr_running && !pulled_task) pulled_task = 1; - goto out; - } if (pulled_task || time_after(jiffies, this_rq->next_balance)) { /* @@ -6722,9 +6725,6 @@ static int idle_balance(struct rq *this_rq) this_rq->next_balance = next_balance; } - if (curr_cost > this_rq->max_idle_balance_cost) - this_rq->max_idle_balance_cost = curr_cost; - out: /* Is there a task of a high priority class? */ if (this_rq->nr_running != this_rq->cfs.h_nr_running && From 2b4cfe64dee0d84506b951d81bf55d9891744d25 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 23 Apr 2014 18:30:34 -0700 Subject: [PATCH 031/178] sched/numa: Initialize newidle balance stats in sd_numa_init() Also initialize the per-sd variables for newidle load balancing in sd_numa_init(). Signed-off-by: Jason Low Acked-by: morten.rasmussen@arm.com Cc: daniel.lezcano@linaro.org Cc: alex.shi@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: efault@gmx.de Cc: vincent.guittot@linaro.org Cc: aswin@hp.com Cc: chegu_vinod@hp.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1398303035-18255-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 28921ec91b3d..13584f1cccfc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6026,6 +6026,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) , .last_balance = jiffies, .balance_interval = sd_weight, + .max_newidle_lb_cost = 0, + .next_decay_max_lb_cost = jiffies, }; SD_INIT_NAME(sd, NUMA); sd->private = &tl->data; From 4f4178f3bb1f470d7fb863ec531e08e20a0fd51c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 3 May 2014 16:12:47 +0200 Subject: [PATCH 032/178] net: cdc_mbim: __vlan_find_dev_deep need rcu_read_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes this warning introduced by commit 5b8f15f78e6f ("net: cdc_mbim: handle IPv6 Neigbor Solicitations"): =============================== [ INFO: suspicious RCU usage. ] 3.15.0-rc3 #213 Tainted: G W O ------------------------------- net/8021q/vlan_core.c:69 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 no locks held by ksoftirqd/0/3. stack backtrace: CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G W O 3.15.0-rc3 #213 Hardware name: LENOVO 2776LEG/2776LEG, BIOS 6EET55WW (3.15 ) 12/19/2011 0000000000000001 ffff880232533bf0 ffffffff813a5ee6 0000000000000006 ffff880232530090 ffff880232533c20 ffffffff81076b94 0000000000000081 0000000000000000 ffff8802085ac000 ffff88007fc8ea00 ffff880232533c50 Call Trace: [] dump_stack+0x4e/0x68 [] lockdep_rcu_suspicious+0xfa/0x103 [] __vlan_find_dev_deep+0x54/0x94 [] cdc_mbim_rx_fixup+0x379/0x66a [cdc_mbim] [] ? _raw_spin_unlock_irqrestore+0x3a/0x49 [] ? trace_hardirqs_on_caller+0x192/0x1a1 [] usbnet_bh+0x59/0x287 [usbnet] [] tasklet_action+0xbb/0xcd [] __do_softirq+0x14c/0x30d [] run_ksoftirqd+0x1f/0x50 [] smpboot_thread_fn+0x172/0x18e [] ? SyS_setgroups+0xdf/0xdf [] kthread+0xb5/0xbd [] ? __wait_for_common+0x13b/0x170 [] ? __kthread_parkme+0x5c/0x5c [] ret_from_fork+0x7c/0xb0 [] ? __kthread_parkme+0x5c/0x5c Fixes: 5b8f15f78e6f ("net: cdc_mbim: handle IPv6 Neigbor Solicitations") Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index c9f3281506af..13f7705fd679 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -204,17 +204,23 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) return; /* need to send the NA on the VLAN dev, if any */ - if (tci) + rcu_read_lock(); + if (tci) { netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q), tci); - else + if (!netdev) { + rcu_read_unlock(); + return; + } + } else { netdev = dev->net; - if (!netdev) - return; + } + dev_hold(netdev); + rcu_read_unlock(); in6_dev = in6_dev_get(netdev); if (!in6_dev) - return; + goto out; is_router = !!in6_dev->cnf.forwarding; in6_dev_put(in6_dev); @@ -224,6 +230,8 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) true /* solicited */, false /* override */, true /* inc_opt */); +out: + dev_put(netdev); } static bool is_neigh_solicit(u8 *buf, size_t len) From ca6c5d4ad216d5942ae544bbf02503041bd802aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 4 May 2014 23:24:31 +0200 Subject: [PATCH 033/178] net: ipv4: ip_forward: fix inverted local_df test local_df means 'ignore DF bit if set', so if its set we're allowed to perform ip fragmentation. This wasn't noticed earlier because the output path also drops such skbs (and emits needed icmp error) and because netfilter ip defrag did not set local_df until couple of days ago. Only difference is that DF-packets-larger-than MTU now discarded earlier (f.e. we avoid pointless netfilter postrouting trip). While at it, drop the repeated test ip_exceeds_mtu, checking it once is enough... Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index be8abe73bb9f..c29ae8371e44 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,12 +42,12 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - !skb->local_df; + skb->local_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) From 418a31561d594a2b636c1e2fa94ecd9e1245abb1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 00:03:34 +0200 Subject: [PATCH 034/178] net: ipv6: send pkttoobig immediately if orig frag size > mtu If conntrack defragments incoming ipv6 frags it stores largest original frag size in ip6cb and sets ->local_df. We must thus first test the largest original frag size vs. mtu, and not vice versa. Without this patch PKTTOOBIG is still generated in ip6_fragment() later in the stack, but 1) IPSTATS_MIB_INTOOBIGERRORS won't increment 2) packet did (needlessly) traverse netfilter postrouting hook. Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 40e7581374f7..31a38bde69ef 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -344,12 +344,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; + /* ipv6 conntrack defrag sets max_frag_size + local_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; + if (skb->local_df) + return false; + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) return false; From c7ba65d7b64984ff371cb5630b36af23506c50d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 15:00:43 +0200 Subject: [PATCH 035/178] net: ip: push gso skb forwarding handling down the stack Doing the segmentation in the forward path has one major drawback: When using virtio, we may process gso udp packets coming from host network stack. In that case, netfilter POSTROUTING will see one packet with udp header followed by multiple ip fragments. Delay the segmentation and do it after POSTROUTING invocation to avoid this. Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 50 ------------------------------------------ net/ipv4/ip_output.c | 51 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 53 deletions(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index c29ae8371e44..6f111e48e11c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } -static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) -{ - unsigned int mtu; - - if (skb->local_df || !skb_is_gso(skb)) - return false; - - mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); - - /* if seglen > mtu, do software segmentation for IP fragmentation on - * output. DF bit cannot be set since ip_forward would have sent - * icmp error. - */ - return skb_gso_network_seglen(skb) > mtu; -} - -/* called if GSO skb needs to be fragmented on forward */ -static int ip_forward_finish_gso(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - netdev_features_t features; - struct sk_buff *segs; - int ret = 0; - - features = netif_skb_dev_features(skb, dst->dev); - segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { - kfree_skb(skb); - return -ENOMEM; - } - - consume_skb(skb); - - do { - struct sk_buff *nskb = segs->next; - int err; - - segs->next = NULL; - err = dst_output(segs); - - if (err && ret == 0) - ret = err; - segs = nskb; - } while (segs); - - return ret; -} static int ip_forward_finish(struct sk_buff *skb) { @@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - if (ip_gso_exceeds_dst_mtu(skb)) - return ip_forward_finish_gso(skb); - return dst_output(skb); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1cbeba5edff9..a52f50187b54 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static int ip_finish_output_gso(struct sk_buff *skb) +{ + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + /* common case: locally created skb or seglen is <= mtu */ + if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || + skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) + return ip_finish_output2(skb); + + /* Slowpath - GSO segment length is exceeding the dst MTU. + * + * This can happen in two cases: + * 1) TCP GRO packet, DF bit not set + * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly + * from host network stack. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = ip_fragment(segs, ip_finish_output2); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) @@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) + if (skb_is_gso(skb)) + return ip_finish_output_gso(skb); + + if (skb->len > ip_skb_dst_mtu(skb)) return ip_fragment(skb, ip_finish_output2); - else - return ip_finish_output2(skb); + + return ip_finish_output2(skb); } int ip_mc_output(struct sock *sk, struct sk_buff *skb) From c1e756bfcbcac838a86a23f3e4501b556a961e3c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 15:00:44 +0200 Subject: [PATCH 036/178] Revert "net: core: introduce netif_skb_dev_features" This reverts commit d206940319c41df4299db75ed56142177bb2e5f6, there are no more callers. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +------ net/core/dev.c | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa6604..20e99efb1ca6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev); -static inline netdev_features_t netif_skb_features(struct sk_buff *skb) -{ - return netif_skb_dev_features(skb, skb->dev); -} +netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..c619b8641337 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - const struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { int tmp; if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { + } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - netdev_features_t features = dev->features; + netdev_features_t features = skb->dev->features; - if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } -EXPORT_SYMBOL(netif_skb_dev_features); +EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) From 76a691d0ab71a244f7582a5b0387728befbdb52f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 5 May 2014 14:51:47 -0400 Subject: [PATCH 037/178] jme: Fix DMA unmap warning The jme driver forgot to check the return status from pci_map_page in its tx path, causing a dma api warning on unmap. Easy fix, just do the check and augment the tx path to tell the stack that the driver is busy so we re-queue the frame. Signed-off-by: Neil Horman CC: Guo-Fu Tseng CC: "David S. Miller" Signed-off-by: David S. Miller --- drivers/net/ethernet/jme.c | 53 +++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index b0c6050479eb..6e664d9038d6 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1988,7 +1988,7 @@ jme_alloc_txdesc(struct jme_adapter *jme, return idx; } -static void +static int jme_fill_tx_map(struct pci_dev *pdev, struct txdesc *txdesc, struct jme_buffer_info *txbi, @@ -2005,6 +2005,9 @@ jme_fill_tx_map(struct pci_dev *pdev, len, PCI_DMA_TODEVICE); + if (unlikely(pci_dma_mapping_error(pdev, dmaaddr))) + return -EINVAL; + pci_dma_sync_single_for_device(pdev, dmaaddr, len, @@ -2021,9 +2024,30 @@ jme_fill_tx_map(struct pci_dev *pdev, txbi->mapping = dmaaddr; txbi->len = len; + return 0; } -static void +static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int endidx) +{ + struct jme_ring *txring = &(jme->txring[0]); + struct jme_buffer_info *txbi = txring->bufinf, *ctxbi; + int mask = jme->tx_ring_mask; + int j; + + for (j = startidx ; j < endidx ; ++j) { + ctxbi = txbi + ((startidx + j + 2) & (mask)); + pci_unmap_page(jme->pdev, + ctxbi->mapping, + ctxbi->len, + PCI_DMA_TODEVICE); + + ctxbi->mapping = 0; + ctxbi->len = 0; + } + +} + +static int jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) { struct jme_ring *txring = &(jme->txring[0]); @@ -2034,25 +2058,37 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) int mask = jme->tx_ring_mask; const struct skb_frag_struct *frag; u32 len; + int ret = 0; for (i = 0 ; i < nr_frags ; ++i) { frag = &skb_shinfo(skb)->frags[i]; ctxdesc = txdesc + ((idx + i + 2) & (mask)); ctxbi = txbi + ((idx + i + 2) & (mask)); - jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, + ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, skb_frag_page(frag), frag->page_offset, skb_frag_size(frag), hidma); + if (ret) { + jme_drop_tx_map(jme, idx, idx+i); + goto out; + } + } len = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; ctxdesc = txdesc + ((idx + 1) & (mask)); ctxbi = txbi + ((idx + 1) & (mask)); - jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, virt_to_page(skb->data), + ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, virt_to_page(skb->data), offset_in_page(skb->data), len, hidma); + if (ret) + jme_drop_tx_map(jme, idx, idx+i); + +out: + return ret; } + static int jme_tx_tso(struct sk_buff *skb, __le16 *mss, u8 *flags) { @@ -2131,6 +2167,7 @@ jme_fill_tx_desc(struct jme_adapter *jme, struct sk_buff *skb, int idx) struct txdesc *txdesc; struct jme_buffer_info *txbi; u8 flags; + int ret = 0; txdesc = (struct txdesc *)txring->desc + idx; txbi = txring->bufinf + idx; @@ -2155,7 +2192,10 @@ jme_fill_tx_desc(struct jme_adapter *jme, struct sk_buff *skb, int idx) if (jme_tx_tso(skb, &txdesc->desc1.mss, &flags)) jme_tx_csum(jme, skb, &flags); jme_tx_vlan(skb, &txdesc->desc1.vlan, &flags); - jme_map_tx_skb(jme, skb, idx); + ret = jme_map_tx_skb(jme, skb, idx); + if (ret) + return ret; + txdesc->desc1.flags = flags; /* * Set tx buffer info after telling NIC to send @@ -2228,7 +2268,8 @@ jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - jme_fill_tx_desc(jme, skb, idx); + if (jme_fill_tx_desc(jme, skb, idx)) + return NETDEV_TX_BUSY; jwrite32(jme, JME_TXCS, jme->reg_txcs | TXCS_SELECT_QUEUE0 | From 83f7a85f1134c6e914453f5747435415a23d516b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 13 Apr 2014 16:03:11 +0300 Subject: [PATCH 038/178] iwlwifi: pcie: disable interrupts upon PCIe alloc In case RFKILL is in KILL position, the NIC will issue an interrupt straight away. This interrupt won't be sent because it is masked in the hardware. But if our interrupt service routine is called for another reason (SHARED_IRQ), then we'll look at the interrupt cause and service it. This can cause bad things if we are not ready yet. Explicitly clean the interrupt cause register to make sure we won't service anything before we are ready to. Cc: [3.14] Reported-and-tested-by: Alexander Monakov Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index dcfd6d866d09..2365553f1ef7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1749,6 +1749,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, * PCI Tx retries from interfering with C3 CPU state */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); + trans->dev = &pdev->dev; + trans_pcie->pci_dev = pdev; + iwl_disable_interrupts(trans); + err = pci_enable_msi(pdev); if (err) { dev_err(&pdev->dev, "pci_enable_msi failed(0X%x)\n", err); @@ -1760,8 +1764,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } - trans->dev = &pdev->dev; - trans_pcie->pci_dev = pdev; trans->hw_rev = iwl_read32(trans, CSR_HW_REV); trans->hw_id = (pdev->device << 16) + pdev->subsystem_device; snprintf(trans->hw_id_str, sizeof(trans->hw_id_str), @@ -1787,8 +1789,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_pci_disable_msi; } - trans_pcie->inta_mask = CSR_INI_SET_MASK; - if (iwl_pcie_alloc_ict(trans)) goto out_free_cmd_pool; @@ -1800,6 +1800,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_free_ict; } + trans_pcie->inta_mask = CSR_INI_SET_MASK; + return trans; out_free_ict: From d5d2bf3eabb34cc8eaf54db37fdc43f04267985a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 6 May 2014 03:46:48 -0400 Subject: [PATCH 039/178] qlcnic: Fix panic while dumping TX queues on TX timeout o In case of non-multi TX queue mode driver does not initialize "crb_intr_mask" pointer and driver was accessing that un-initialized pointer while dumping TX queue. So dump "crb_intr_mask" only when it is initilaized. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 0bc914859e38..be789513774c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2943,9 +2943,13 @@ static void qlcnic_dump_tx_rings(struct qlcnic_adapter *adapter) tx_ring->tx_stats.xmit_called, tx_ring->tx_stats.xmit_on, tx_ring->tx_stats.xmit_off); + + if (tx_ring->crb_intr_mask) + netdev_info(netdev, "crb_intr_mask=%d\n", + readl(tx_ring->crb_intr_mask)); + netdev_info(netdev, - "crb_intr_mask=%d, hw_producer=%d, sw_producer=%d sw_consumer=%d, hw_consumer=%d\n", - readl(tx_ring->crb_intr_mask), + "hw_producer=%d, sw_producer=%d sw_consumer=%d, hw_consumer=%d\n", readl(tx_ring->crb_cmd_producer), tx_ring->producer, tx_ring->sw_consumer, le32_to_cpu(*(tx_ring->hw_consumer))); From 84d7ad2c3b8a80888d9a483388ccbd5e5f07438f Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Tue, 6 May 2014 03:46:49 -0400 Subject: [PATCH 040/178] qlcnic: Set real_num_{tx|rx}_queues properly Do not set netdev->real_num_tx_queues directly, let netif_set_real_num_tx_queues() take care of it. Do not overwrite netdev->num_tx_queues everytime when driver changes its Tx ring size through ethtool -L and also notify stack to update number of Rx queues. Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 16 ------ .../net/ethernet/qlogic/qlcnic/qlcnic_main.c | 49 ++++++++++++++++++- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 7b52a88923ef..f785d01c7d12 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1719,22 +1719,6 @@ static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring) tx_ring->producer; } -static inline int qlcnic_set_real_num_queues(struct qlcnic_adapter *adapter, - struct net_device *netdev) -{ - int err; - - netdev->num_tx_queues = adapter->drv_tx_rings; - netdev->real_num_tx_queues = adapter->drv_tx_rings; - - err = netif_set_real_num_tx_queues(netdev, adapter->drv_tx_rings); - if (err) - netdev_err(netdev, "failed to set %d Tx queues\n", - adapter->drv_tx_rings); - - return err; -} - struct qlcnic_nic_template { int (*config_bridged_mode) (struct qlcnic_adapter *, u32); int (*config_led) (struct qlcnic_adapter *, u32, u32); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index be789513774c..7e55e88a81bf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2206,6 +2206,31 @@ static void qlcnic_82xx_set_mac_filter_count(struct qlcnic_adapter *adapter) ahw->max_uc_count = count; } +static int qlcnic_set_real_num_queues(struct qlcnic_adapter *adapter, + u8 tx_queues, u8 rx_queues) +{ + struct net_device *netdev = adapter->netdev; + int err = 0; + + if (tx_queues) { + err = netif_set_real_num_tx_queues(netdev, tx_queues); + if (err) { + netdev_err(netdev, "failed to set %d Tx queues\n", + tx_queues); + return err; + } + } + + if (rx_queues) { + err = netif_set_real_num_rx_queues(netdev, rx_queues); + if (err) + netdev_err(netdev, "failed to set %d Rx queues\n", + rx_queues); + } + + return err; +} + int qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, int pci_using_dac) @@ -2269,7 +2294,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, netdev->priv_flags |= IFF_UNICAST_FLT; netdev->irq = adapter->msix_entries[0].vector; - err = qlcnic_set_real_num_queues(adapter, netdev); + err = qlcnic_set_real_num_queues(adapter, adapter->drv_tx_rings, + adapter->drv_sds_rings); if (err) return err; @@ -3982,12 +4008,21 @@ int qlcnic_validate_rings(struct qlcnic_adapter *adapter, __u32 ring_cnt, int qlcnic_setup_rings(struct qlcnic_adapter *adapter) { struct net_device *netdev = adapter->netdev; + u8 tx_rings, rx_rings; int err; if (test_bit(__QLCNIC_RESETTING, &adapter->state)) return -EBUSY; + tx_rings = adapter->drv_tss_rings; + rx_rings = adapter->drv_rss_rings; + netif_device_detach(netdev); + + err = qlcnic_set_real_num_queues(adapter, tx_rings, rx_rings); + if (err) + goto done; + if (netif_running(netdev)) __qlcnic_down(adapter, netdev); @@ -4007,7 +4042,17 @@ int qlcnic_setup_rings(struct qlcnic_adapter *adapter) return err; } - netif_set_real_num_tx_queues(netdev, adapter->drv_tx_rings); + /* Check if we need to update real_num_{tx|rx}_queues because + * qlcnic_setup_intr() may change Tx/Rx rings size + */ + if ((tx_rings != adapter->drv_tx_rings) || + (rx_rings != adapter->drv_sds_rings)) { + err = qlcnic_set_real_num_queues(adapter, + adapter->drv_tx_rings, + adapter->drv_sds_rings); + if (err) + goto done; + } if (qlcnic_83xx_check(adapter)) { qlcnic_83xx_initialize_nic(adapter, 1); From aeefa1ecfc799b0ea2c4979617f14cecd5cccbfd Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Tue, 6 May 2014 18:23:08 +0300 Subject: [PATCH 041/178] ipv4: fib_semantics: increment fib_info_cnt after fib_info allocation Increment fib_info_cnt in fib_create_info() right after successfuly alllocating fib_info structure, overwise fib_metrics allocation failure leads to fib_info_cnt incorrectly decremented in free_fib_info(), called on error path from fib_create_info(). Signed-off-by: Sergey Popovich Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8a043f03c88e..b10cd43a4722 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -821,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); if (!fi->fib_metrics) goto failure; } else fi->fib_metrics = (u32 *) dst_default_metrics; - fib_info_cnt++; fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; From 23a456f05353035d1a2b3f1b9a92707acdc036e0 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 6 May 2014 18:52:16 +0200 Subject: [PATCH 042/178] net: mdio: of_mdiobus_register(): fall back to mdiobus_register() for !CONFIG_OF If CONFIG_OF is not set, make of_mdiobus_register() call mdiobus_register() instead of returning -ENOSYS. This way, we can just call of_mdiobus_register() from all DT-enabled drivers to handle the compat cases. Signed-off-by: Daniel Mack Suggested-by: Florian Fainelli Acked-by: Florian Fainelli Acked-by: Mugunthan V N Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 6fe8464ed767..881a7c3571f4 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -31,7 +31,12 @@ extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { - return -ENOSYS; + /* + * Fall back to the non-DT function to register a bus. + * This way, we don't have to keep compat bits around in drivers. + */ + + return mdiobus_register(mdio); } static inline struct phy_device *of_phy_find_device(struct device_node *phy_np) From 0041898ec19f10e2adf5f1245f32df88d627ae8a Mon Sep 17 00:00:00 2001 From: "S. Lockwood-Childs" Date: Thu, 8 May 2014 13:34:01 -0400 Subject: [PATCH 043/178] tools/liblockdep: Build liblockdep from tools/Makefile add targets to build liblockdep with make -C tools liblockdep like the way other stuff under tools/ can be built Signed-off-by: S. Lockwood-Childs Signed-off-by: Sasha Levin --- tools/Makefile | 6 ++++++ tools/lib/lockdep/Makefile | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index bcae806b0c39..9a617adc6675 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -44,6 +44,9 @@ cpupower: FORCE cgroup firewire hv guest usb virtio vm net: FORCE $(call descend,$@) +liblockdep: FORCE + $(call descend,lib/lockdep) + libapikfs: FORCE $(call descend,lib/api) @@ -91,6 +94,9 @@ cpupower_clean: cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean: $(call descend,$(@:_clean=),clean) +liblockdep_clean: + $(call descend,lib/lockdep,clean) + libapikfs_clean: $(call descend,lib/api,clean) diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index cb09d3ff8f58..c70919b5ebe7 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -1,8 +1,7 @@ # file format version FILE_VERSION = 1 -MAKEFLAGS += --no-print-directory -LIBLOCKDEP_VERSION=$(shell make -sC ../../.. kernelversion) +LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by From ad3b564deb91c67ff9621255307ed31da870c8f7 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 8 May 2014 13:55:13 -0400 Subject: [PATCH 044/178] tools/liblockdep: Remove all build files when doing make clean We forgot to remove the shared library with the version number when 'make clean' ran, fix the clean pattern. Signed-off-by: Sasha Levin --- tools/lib/lockdep/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index c70919b5ebe7..bba2f5253b6e 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -230,7 +230,7 @@ install_lib: all_cmd install: install_lib clean: - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d + $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d $(RM) tags TAGS endif # skip-makefile From b18eb2d779240631a098626cb6841ee2dd34fda0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 May 2014 14:07:32 -0700 Subject: [PATCH 045/178] sparc64: Fix huge TSB mapping on pre-UltraSPARC-III cpus. Access to the TSB hash tables during TLB misses requires that there be an atomic 128-bit quad load available so that we fetch a matching TAG and DATA field at the same time. On cpus prior to UltraSPARC-III only virtual address based quad loads are available. UltraSPARC-III and later provide physical address based variants which are easier to use. When we only have virtual address based quad loads available this means that we have to lock the TSB into the TLB at a fixed virtual address on each cpu when it runs that process. We can't just access the PAGE_OFFSET based aliased mapping of these TSBs because we cannot take a recursive TLB miss inside of the TLB miss handler without risking running out of hardware trap levels (some trap combinations can be deep, such as those generated by register window spill and fill traps). Without huge pages it's working perfectly fine, but when the huge TSB got added another chunk of fixed virtual address space was not allocated for this second TSB mapping. So we were mapping both the 8K and 4MB TSBs to the same exact virtual address, causing multiple TLB matches which gives undefined behavior. Signed-off-by: David S. Miller --- arch/sparc/include/asm/pgtable_64.h | 6 ++++-- arch/sparc/mm/tsb.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index fde5abaac0cc..1a49ffdf9da9 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -24,7 +24,8 @@ /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB). * The page copy blockops can use 0x6000000 to 0x8000000. - * The TSB is mapped in the 0x8000000 to 0xa000000 range. + * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range. + * The 4M TSB is mapped in the 0x8400000 to 0x8800000 range. * The PROM resides in an area spanning 0xf0000000 to 0x100000000. * The vmalloc area spans 0x100000000 to 0x200000000. * Since modules need to be in the lowest 32-bits of the address space, @@ -33,7 +34,8 @@ * 0x400000000. */ #define TLBTEMP_BASE _AC(0x0000000006000000,UL) -#define TSBMAP_BASE _AC(0x0000000008000000,UL) +#define TSBMAP_8K_BASE _AC(0x0000000008000000,UL) +#define TSBMAP_4M_BASE _AC(0x0000000008400000,UL) #define MODULES_VADDR _AC(0x0000000010000000,UL) #define MODULES_LEN _AC(0x00000000e0000000,UL) #define MODULES_END _AC(0x00000000f0000000,UL) diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index f5d506fdddad..fe19b81acc09 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -133,7 +133,19 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign mm->context.tsb_block[tsb_idx].tsb_nentries = tsb_bytes / sizeof(struct tsb); - base = TSBMAP_BASE; + switch (tsb_idx) { + case MM_TSB_BASE: + base = TSBMAP_8K_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + base = TSBMAP_4M_BASE; + break; +#endif + default: + BUG(); + } + tte = pgprot_val(PAGE_KERNEL_LOCKED); tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); From a8951d5814e1373807a94f79f7ccec7041325470 Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Thu, 8 May 2014 16:22:35 +0300 Subject: [PATCH 046/178] netfilter: Fix potential use after free in ip6_route_me_harder() Dst is released one line before we access it again with dst->error. Fixes: 58e35d147128 netfilter: ipv6: propagate routing errors from ip6_route_me_harder() Signed-off-by: Sergey Popovich Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1da0d7f..d38e6a8d8b9f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, }; + int err; dst = ip6_route_output(net, skb->sk, &fl6); - if (dst->error) { + err = dst->error; + if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return dst->error; + return err; } /* Drop old route. */ From c9d8f1a64225dfcc2f721d73a5984a2444920744 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2014 11:02:49 -0700 Subject: [PATCH 047/178] ipv4: move local_port_range out of CONFIG_SYSCTL When CONFIG_SYSCTL is not set, ip_local_port_range should still work, just that no one can change it. Therefore we should move it out of sysctl_inet.c. Also, rename it to ->ip_local_ports instead. Cc: David S. Miller Cc: Francois Romieu Reported-by: Stefan de Konink Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- net/ipv4/af_inet.c | 28 ++++++++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 8 ++++---- net/ipv4/ping.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 29 +++++++++++------------------ 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80f500a29498..3d95cd475316 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -66,7 +66,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; - struct local_ports sysctl_local_ports; + struct local_ports ip_local_ports; int sysctl_tcp_ecn; int sysctl_ip_no_pmtu_disc; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8c54870db792..cccc8e487c7e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1650,6 +1650,31 @@ static int __init init_ipv4_mibs(void) return register_pernet_subsys(&ipv4_mib_ops); } +static __net_init int inet_init_net(struct net *net) +{ + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = 32768; + net->ipv4.ip_local_ports.range[1] = 61000; + return 0; +} + +static __net_exit void inet_exit_net(struct net *net) +{ +} + +static __net_initdata struct pernet_operations af_inet_ops = { + .init = inet_init_net, + .exit = inet_exit_net, +}; + +static int __init init_inet_pernet_ops(void) +{ + return register_pernet_subsys(&af_inet_ops); +} + static int ipv4_proc_init(void); /* @@ -1794,6 +1819,9 @@ static int __init inet_init(void) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif + + if (init_inet_pernet_ops()) + pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); /* * Initialise per-cpu ipv4 mibs */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0d1e2cb877ec..a56b8e6e866a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - *low = net->ipv4.sysctl_local_ports.range[0]; - *high = net->ipv4.sysctl_local_ports.range[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + *low = net->ipv4.ip_local_ports.range[0]; + *high = net->ipv4.ip_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8210964a9f19..347bdde9a585 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -240,11 +240,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44eba052b43d..a116c41b05dd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = range[0]; - net->ipv4.sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = range[0]; + net->ipv4.ip_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos) { struct net *net = - container_of(table->data, struct net, ipv4.sysctl_local_ports.range); + container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -90,11 +90,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.sysctl_ping_group_range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } /* Update system visible IP port range */ @@ -103,10 +103,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.sysctl_ping_group_range); - write_seqlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ip_local_port_range", - .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), - .data = &init_net.ipv4.sysctl_local_ports.range, + .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), + .data = &init_net.ipv4.ip_local_ports.range, .mode = 0644, .proc_handler = ipv4_local_port_range, }, @@ -865,13 +865,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - /* - * Set defaults for local port range - */ - seqlock_init(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = 32768; - net->ipv4.sysctl_local_ports.range[1] = 61000; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; From ba6b918ab234186d3aa1663e296586a1b526b77a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2014 11:02:50 -0700 Subject: [PATCH 048/178] ping: move ping_group_range out of CONFIG_SYSCTL Similarly, when CONFIG_SYSCTL is not set, ping_group_range should still work, just that no one can change it. Therefore we should move it out of sysctl_net_ipv4.c. And, it should not share the same seqlock with ip_local_port_range. BTW, rename it to ->ping_group_range instead. Cc: David S. Miller Cc: Francois Romieu Reported-by: Stefan de Konink Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 7 ++++++- net/ipv4/af_inet.c | 8 ++++++++ net/ipv4/ping.c | 6 +++--- net/ipv4/sysctl_net_ipv4.c | 13 +++---------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3d95cd475316..b2704fd0ec80 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -20,6 +20,11 @@ struct local_ports { int range[2]; }; +struct ping_group_range { + seqlock_t lock; + kgid_t range[2]; +}; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -72,7 +77,7 @@ struct netns_ipv4 { int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; - kgid_t sysctl_ping_group_range[2]; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cccc8e487c7e..6d6dd345bc4d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1658,6 +1658,14 @@ static __net_init int inet_init_net(struct net *net) seqlock_init(&net->ipv4.ip_local_ports.lock); net->ipv4.ip_local_ports.range[0] = 32768; net->ipv4.ip_local_ports.range[1] = 61000; + + seqlock_init(&net->ipv4.ping_group_range.lock); + /* + * Sane defaults - nobody may create ping sockets. + * Boot scripts should set this to distro-specific group. + */ + net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); return 0; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 347bdde9a585..044a0ddf6a79 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -236,15 +236,15 @@ exit: static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, kgid_t *high) { - kgid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.ping_group_range.range; unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a116c41b05dd..5cde8f263d40 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -87,7 +87,7 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); @@ -102,7 +102,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; @@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ping_group_range", - .data = &init_net.ipv4.sysctl_ping_group_range, + .data = &init_net.ipv4.ping_group_range.range, .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, @@ -858,13 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[i].data += (void *)net - (void *)&init_net; } - /* - * Sane defaults - nobody may create ping sockets. - * Boot scripts should set this to distro-specific group. - */ - net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); - net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; From 6af55ff52b02d492d45db88df3e461fa51a6f753 Mon Sep 17 00:00:00 2001 From: Darek Marcinkiewicz Date: Tue, 6 May 2014 22:24:50 +0200 Subject: [PATCH 049/178] Driver for Beckhoff CX5020 EtherCAT master module. This driver adds support for EtherCAT master module located on CCAT FPGA found on Beckhoff CX series industrial PCs. The driver exposes EtherCAT master as an ethernet interface. EtherCAT is a fieldbus protocol defined on top of ethernet and Beckhoff CX5020 PCs come with built-in EtherCAT master module located on a FPGA, which in turn is connected to a PCI bus. Signed-off-by: Dariusz Marcinkiewicz Signed-off-by: David S. Miller --- drivers/net/ethernet/Kconfig | 12 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/ec_bhf.c | 706 ++++++++++++++++++++++++++++++++++ 3 files changed, 719 insertions(+) create mode 100644 drivers/net/ethernet/ec_bhf.c diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 39b26fe28d10..d7401017a3f1 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -35,6 +35,18 @@ source "drivers/net/ethernet/calxeda/Kconfig" source "drivers/net/ethernet/chelsio/Kconfig" source "drivers/net/ethernet/cirrus/Kconfig" source "drivers/net/ethernet/cisco/Kconfig" + +config CX_ECAT + tristate "Beckhoff CX5020 EtherCAT master support" + depends on PCI + ---help--- + Driver for EtherCAT master module located on CCAT FPGA + that can be found on Beckhoff CX5020, and possibly other of CX + Beckhoff CX series industrial PCs. + + To compile this driver as a module, choose M here. The module + will be called ec_bhf. + source "drivers/net/ethernet/davicom/Kconfig" config DNET diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 545d0b3b9cb4..35190e36c456 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_NET_CALXEDA_XGMAC) += calxeda/ obj-$(CONFIG_NET_VENDOR_CHELSIO) += chelsio/ obj-$(CONFIG_NET_VENDOR_CIRRUS) += cirrus/ obj-$(CONFIG_NET_VENDOR_CISCO) += cisco/ +obj-$(CONFIG_CX_ECAT) += ec_bhf.o obj-$(CONFIG_DM9000) += davicom/ obj-$(CONFIG_DNET) += dnet.o obj-$(CONFIG_NET_VENDOR_DEC) += dec/ diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c new file mode 100644 index 000000000000..4884205e56ee --- /dev/null +++ b/drivers/net/ethernet/ec_bhf.c @@ -0,0 +1,706 @@ + /* + * drivers/net/ethernet/beckhoff/ec_bhf.c + * + * Copyright (C) 2014 Darek Marcinkiewicz + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* This is a driver for EtherCAT master module present on CCAT FPGA. + * Those can be found on Bechhoff CX50xx industrial PCs. + */ + +#if 0 +#define DEBUG +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define TIMER_INTERVAL_NSEC 20000 + +#define INFO_BLOCK_SIZE 0x10 +#define INFO_BLOCK_TYPE 0x0 +#define INFO_BLOCK_REV 0x2 +#define INFO_BLOCK_BLK_CNT 0x4 +#define INFO_BLOCK_TX_CHAN 0x4 +#define INFO_BLOCK_RX_CHAN 0x5 +#define INFO_BLOCK_OFFSET 0x8 + +#define EC_MII_OFFSET 0x4 +#define EC_FIFO_OFFSET 0x8 +#define EC_MAC_OFFSET 0xc + +#define MAC_FRAME_ERR_CNT 0x0 +#define MAC_RX_ERR_CNT 0x1 +#define MAC_CRC_ERR_CNT 0x2 +#define MAC_LNK_LST_ERR_CNT 0x3 +#define MAC_TX_FRAME_CNT 0x10 +#define MAC_RX_FRAME_CNT 0x14 +#define MAC_TX_FIFO_LVL 0x20 +#define MAC_DROPPED_FRMS 0x28 +#define MAC_CONNECTED_CCAT_FLAG 0x78 + +#define MII_MAC_ADDR 0x8 +#define MII_MAC_FILT_FLAG 0xe +#define MII_LINK_STATUS 0xf + +#define FIFO_TX_REG 0x0 +#define FIFO_TX_RESET 0x8 +#define FIFO_RX_REG 0x10 +#define FIFO_RX_ADDR_VALID (1u << 31) +#define FIFO_RX_RESET 0x18 + +#define DMA_CHAN_OFFSET 0x1000 +#define DMA_CHAN_SIZE 0x8 + +#define DMA_WINDOW_SIZE_MASK 0xfffffffc + +static struct pci_device_id ids[] = { + { PCI_DEVICE(0x15ec, 0x5000), }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ids); + +struct rx_header { +#define RXHDR_NEXT_ADDR_MASK 0xffffffu +#define RXHDR_NEXT_VALID (1u << 31) + __le32 next; +#define RXHDR_NEXT_RECV_FLAG 0x1 + __le32 recv; +#define RXHDR_LEN_MASK 0xfffu + __le16 len; + __le16 port; + __le32 reserved; + u8 timestamp[8]; +} __packed; + +#define PKT_PAYLOAD_SIZE 0x7e8 +struct rx_desc { + struct rx_header header; + u8 data[PKT_PAYLOAD_SIZE]; +} __packed; + +struct tx_header { + __le16 len; +#define TX_HDR_PORT_0 0x1 +#define TX_HDR_PORT_1 0x2 + u8 port; + u8 ts_enable; +#define TX_HDR_SENT 0x1 + __le32 sent; + u8 timestamp[8]; +} __packed; + +struct tx_desc { + struct tx_header header; + u8 data[PKT_PAYLOAD_SIZE]; +} __packed; + +#define FIFO_SIZE 64 + +static long polling_frequency = TIMER_INTERVAL_NSEC; + +struct bhf_dma { + u8 *buf; + size_t len; + dma_addr_t buf_phys; + + u8 *alloc; + size_t alloc_len; + dma_addr_t alloc_phys; +}; + +struct ec_bhf_priv { + struct net_device *net_dev; + + struct pci_dev *dev; + + void * __iomem io; + void * __iomem dma_io; + + struct hrtimer hrtimer; + + int tx_dma_chan; + int rx_dma_chan; + void * __iomem ec_io; + void * __iomem fifo_io; + void * __iomem mii_io; + void * __iomem mac_io; + + struct bhf_dma rx_buf; + struct rx_desc *rx_descs; + int rx_dnext; + int rx_dcount; + + struct bhf_dma tx_buf; + struct tx_desc *tx_descs; + int tx_dcount; + int tx_dnext; + + u64 stat_rx_bytes; + u64 stat_tx_bytes; +}; + +#define PRIV_TO_DEV(priv) (&(priv)->dev->dev) + +#define ETHERCAT_MASTER_ID 0x14 + +static void ec_bhf_print_status(struct ec_bhf_priv *priv) +{ + struct device *dev = PRIV_TO_DEV(priv); + + dev_dbg(dev, "Frame error counter: %d\n", + ioread8(priv->mac_io + MAC_FRAME_ERR_CNT)); + dev_dbg(dev, "RX error counter: %d\n", + ioread8(priv->mac_io + MAC_RX_ERR_CNT)); + dev_dbg(dev, "CRC error counter: %d\n", + ioread8(priv->mac_io + MAC_CRC_ERR_CNT)); + dev_dbg(dev, "TX frame counter: %d\n", + ioread32(priv->mac_io + MAC_TX_FRAME_CNT)); + dev_dbg(dev, "RX frame counter: %d\n", + ioread32(priv->mac_io + MAC_RX_FRAME_CNT)); + dev_dbg(dev, "TX fifo level: %d\n", + ioread8(priv->mac_io + MAC_TX_FIFO_LVL)); + dev_dbg(dev, "Dropped frames: %d\n", + ioread8(priv->mac_io + MAC_DROPPED_FRMS)); + dev_dbg(dev, "Connected with CCAT slot: %d\n", + ioread8(priv->mac_io + MAC_CONNECTED_CCAT_FLAG)); + dev_dbg(dev, "Link status: %d\n", + ioread8(priv->mii_io + MII_LINK_STATUS)); +} + +static void ec_bhf_reset(struct ec_bhf_priv *priv) +{ + iowrite8(0, priv->mac_io + MAC_FRAME_ERR_CNT); + iowrite8(0, priv->mac_io + MAC_RX_ERR_CNT); + iowrite8(0, priv->mac_io + MAC_CRC_ERR_CNT); + iowrite8(0, priv->mac_io + MAC_LNK_LST_ERR_CNT); + iowrite32(0, priv->mac_io + MAC_TX_FRAME_CNT); + iowrite32(0, priv->mac_io + MAC_RX_FRAME_CNT); + iowrite8(0, priv->mac_io + MAC_DROPPED_FRMS); + + iowrite8(0, priv->fifo_io + FIFO_TX_RESET); + iowrite8(0, priv->fifo_io + FIFO_RX_RESET); + + iowrite8(0, priv->mac_io + MAC_TX_FIFO_LVL); +} + +static void ec_bhf_send_packet(struct ec_bhf_priv *priv, struct tx_desc *desc) +{ + u32 len = le16_to_cpu(desc->header.len) + sizeof(desc->header); + u32 addr = (u8 *)desc - priv->tx_buf.buf; + + iowrite32((ALIGN(len, 8) << 24) | addr, priv->fifo_io + FIFO_TX_REG); + + dev_dbg(PRIV_TO_DEV(priv), "Done sending packet\n"); +} + +static int ec_bhf_desc_sent(struct tx_desc *desc) +{ + return le32_to_cpu(desc->header.sent) & TX_HDR_SENT; +} + +static void ec_bhf_process_tx(struct ec_bhf_priv *priv) +{ + if (unlikely(netif_queue_stopped(priv->net_dev))) { + /* Make sure that we perceive changes to tx_dnext. */ + smp_rmb(); + + if (ec_bhf_desc_sent(&priv->tx_descs[priv->tx_dnext])) + netif_wake_queue(priv->net_dev); + } +} + +static int ec_bhf_pkt_received(struct rx_desc *desc) +{ + return le32_to_cpu(desc->header.recv) & RXHDR_NEXT_RECV_FLAG; +} + +static void ec_bhf_add_rx_desc(struct ec_bhf_priv *priv, struct rx_desc *desc) +{ + iowrite32(FIFO_RX_ADDR_VALID | ((u8 *)(desc) - priv->rx_buf.buf), + priv->fifo_io + FIFO_RX_REG); +} + +static void ec_bhf_process_rx(struct ec_bhf_priv *priv) +{ + struct rx_desc *desc = &priv->rx_descs[priv->rx_dnext]; + struct device *dev = PRIV_TO_DEV(priv); + + while (ec_bhf_pkt_received(desc)) { + int pkt_size = (le16_to_cpu(desc->header.len) & + RXHDR_LEN_MASK) - sizeof(struct rx_header) - 4; + u8 *data = desc->data; + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(priv->net_dev, pkt_size); + dev_dbg(dev, "Received packet, size: %d\n", pkt_size); + + if (skb) { + memcpy(skb_put(skb, pkt_size), data, pkt_size); + skb->protocol = eth_type_trans(skb, priv->net_dev); + dev_dbg(dev, "Protocol type: %x\n", skb->protocol); + + priv->stat_rx_bytes += pkt_size; + + netif_rx(skb); + } else { + dev_err_ratelimited(dev, + "Couldn't allocate a skb_buff for a packet of size %u\n", + pkt_size); + } + + desc->header.recv = 0; + + ec_bhf_add_rx_desc(priv, desc); + + priv->rx_dnext = (priv->rx_dnext + 1) % priv->rx_dcount; + desc = &priv->rx_descs[priv->rx_dnext]; + } + +} + +static enum hrtimer_restart ec_bhf_timer_fun(struct hrtimer *timer) +{ + struct ec_bhf_priv *priv = container_of(timer, struct ec_bhf_priv, + hrtimer); + ec_bhf_process_rx(priv); + ec_bhf_process_tx(priv); + + if (!netif_running(priv->net_dev)) + return HRTIMER_NORESTART; + + hrtimer_forward_now(timer, ktime_set(0, polling_frequency)); + return HRTIMER_RESTART; +} + +static int ec_bhf_setup_offsets(struct ec_bhf_priv *priv) +{ + struct device *dev = PRIV_TO_DEV(priv); + unsigned block_count, i; + void * __iomem ec_info; + + dev_dbg(dev, "Info block:\n"); + dev_dbg(dev, "Type of function: %x\n", (unsigned)ioread16(priv->io)); + dev_dbg(dev, "Revision of function: %x\n", + (unsigned)ioread16(priv->io + INFO_BLOCK_REV)); + + block_count = ioread8(priv->io + INFO_BLOCK_BLK_CNT); + dev_dbg(dev, "Number of function blocks: %x\n", block_count); + + for (i = 0; i < block_count; i++) { + u16 type = ioread16(priv->io + i * INFO_BLOCK_SIZE + + INFO_BLOCK_TYPE); + if (type == ETHERCAT_MASTER_ID) + break; + } + if (i == block_count) { + dev_err(dev, "EtherCAT master with DMA block not found\n"); + return -ENODEV; + } + dev_dbg(dev, "EtherCAT master with DMA block found at pos: %d\n", i); + + ec_info = priv->io + i * INFO_BLOCK_SIZE; + dev_dbg(dev, "EtherCAT master revision: %d\n", + ioread16(ec_info + INFO_BLOCK_REV)); + + priv->tx_dma_chan = ioread8(ec_info + INFO_BLOCK_TX_CHAN); + dev_dbg(dev, "EtherCAT master tx dma channel: %d\n", + priv->tx_dma_chan); + + priv->rx_dma_chan = ioread8(ec_info + INFO_BLOCK_RX_CHAN); + dev_dbg(dev, "EtherCAT master rx dma channel: %d\n", + priv->rx_dma_chan); + + priv->ec_io = priv->io + ioread32(ec_info + INFO_BLOCK_OFFSET); + priv->mii_io = priv->ec_io + ioread32(priv->ec_io + EC_MII_OFFSET); + priv->fifo_io = priv->ec_io + ioread32(priv->ec_io + EC_FIFO_OFFSET); + priv->mac_io = priv->ec_io + ioread32(priv->ec_io + EC_MAC_OFFSET); + + dev_dbg(dev, + "EtherCAT block addres: %p, fifo address: %p, mii address: %p, mac address: %p\n", + priv->ec_io, priv->fifo_io, priv->mii_io, priv->mac_io); + + return 0; +} + +static netdev_tx_t ec_bhf_start_xmit(struct sk_buff *skb, + struct net_device *net_dev) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + struct tx_desc *desc; + unsigned len; + + dev_dbg(PRIV_TO_DEV(priv), "Starting xmit\n"); + + desc = &priv->tx_descs[priv->tx_dnext]; + + skb_copy_and_csum_dev(skb, desc->data); + len = skb->len; + + memset(&desc->header, 0, sizeof(desc->header)); + desc->header.len = cpu_to_le16(len); + desc->header.port = TX_HDR_PORT_0; + + ec_bhf_send_packet(priv, desc); + + priv->tx_dnext = (priv->tx_dnext + 1) % priv->tx_dcount; + + if (!ec_bhf_desc_sent(&priv->tx_descs[priv->tx_dnext])) { + /* Make sure that update updates to tx_dnext are perceived + * by timer routine. + */ + smp_wmb(); + + netif_stop_queue(net_dev); + + dev_dbg(PRIV_TO_DEV(priv), "Stopping netif queue\n"); + ec_bhf_print_status(priv); + } + + priv->stat_tx_bytes += len; + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +static int ec_bhf_alloc_dma_mem(struct ec_bhf_priv *priv, + struct bhf_dma *buf, + int channel, + int size) +{ + int offset = channel * DMA_CHAN_SIZE + DMA_CHAN_OFFSET; + struct device *dev = PRIV_TO_DEV(priv); + u32 mask; + + iowrite32(0xffffffff, priv->dma_io + offset); + + mask = ioread32(priv->dma_io + offset); + mask &= DMA_WINDOW_SIZE_MASK; + dev_dbg(dev, "Read mask %x for channel %d\n", mask, channel); + + /* We want to allocate a chunk of memory that is: + * - aligned to the mask we just read + * - is of size 2^mask bytes (at most) + * In order to ensure that we will allocate buffer of + * 2 * 2^mask bytes. + */ + buf->len = min_t(int, ~mask + 1, size); + buf->alloc_len = 2 * buf->len; + + dev_dbg(dev, "Allocating %d bytes for channel %d", + (int)buf->alloc_len, channel); + buf->alloc = dma_alloc_coherent(dev, buf->alloc_len, &buf->alloc_phys, + GFP_KERNEL); + if (buf->alloc == NULL) { + dev_info(dev, "Failed to allocate buffer\n"); + return -ENOMEM; + } + + buf->buf_phys = (buf->alloc_phys + buf->len) & mask; + buf->buf = buf->alloc + (buf->buf_phys - buf->alloc_phys); + + iowrite32(0, priv->dma_io + offset + 4); + iowrite32(buf->buf_phys, priv->dma_io + offset); + dev_dbg(dev, "Buffer: %x and read from dev: %x", + (unsigned)buf->buf_phys, ioread32(priv->dma_io + offset)); + + return 0; +} + +static void ec_bhf_setup_tx_descs(struct ec_bhf_priv *priv) +{ + int i = 0; + + priv->tx_dcount = priv->tx_buf.len / sizeof(struct tx_desc); + priv->tx_descs = (struct tx_desc *) priv->tx_buf.buf; + priv->tx_dnext = 0; + + for (i = 0; i < priv->tx_dcount; i++) + priv->tx_descs[i].header.sent = cpu_to_le32(TX_HDR_SENT); +} + +static void ec_bhf_setup_rx_descs(struct ec_bhf_priv *priv) +{ + int i; + + priv->rx_dcount = priv->rx_buf.len / sizeof(struct rx_desc); + priv->rx_descs = (struct rx_desc *) priv->rx_buf.buf; + priv->rx_dnext = 0; + + for (i = 0; i < priv->rx_dcount; i++) { + struct rx_desc *desc = &priv->rx_descs[i]; + u32 next; + + if (i != priv->rx_dcount - 1) + next = (u8 *)(desc + 1) - priv->rx_buf.buf; + else + next = 0; + next |= RXHDR_NEXT_VALID; + desc->header.next = cpu_to_le32(next); + desc->header.recv = 0; + ec_bhf_add_rx_desc(priv, desc); + } +} + +static int ec_bhf_open(struct net_device *net_dev) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + struct device *dev = PRIV_TO_DEV(priv); + int err = 0; + + dev_info(dev, "Opening device\n"); + + ec_bhf_reset(priv); + + err = ec_bhf_alloc_dma_mem(priv, &priv->rx_buf, priv->rx_dma_chan, + FIFO_SIZE * sizeof(struct rx_desc)); + if (err) { + dev_err(dev, "Failed to allocate rx buffer\n"); + goto out; + } + ec_bhf_setup_rx_descs(priv); + + dev_info(dev, "RX buffer allocated, address: %x\n", + (unsigned)priv->rx_buf.buf_phys); + + err = ec_bhf_alloc_dma_mem(priv, &priv->tx_buf, priv->tx_dma_chan, + FIFO_SIZE * sizeof(struct tx_desc)); + if (err) { + dev_err(dev, "Failed to allocate tx buffer\n"); + goto error_rx_free; + } + dev_dbg(dev, "TX buffer allocated, addres: %x\n", + (unsigned)priv->tx_buf.buf_phys); + + iowrite8(0, priv->mii_io + MII_MAC_FILT_FLAG); + + ec_bhf_setup_tx_descs(priv); + + netif_start_queue(net_dev); + + hrtimer_init(&priv->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + priv->hrtimer.function = ec_bhf_timer_fun; + hrtimer_start(&priv->hrtimer, ktime_set(0, polling_frequency), + HRTIMER_MODE_REL); + + dev_info(PRIV_TO_DEV(priv), "Device open\n"); + + ec_bhf_print_status(priv); + + return 0; + +error_rx_free: + dma_free_coherent(dev, priv->rx_buf.alloc_len, priv->rx_buf.alloc, + priv->rx_buf.alloc_len); +out: + return err; +} + +static int ec_bhf_stop(struct net_device *net_dev) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + struct device *dev = PRIV_TO_DEV(priv); + + hrtimer_cancel(&priv->hrtimer); + + ec_bhf_reset(priv); + + netif_tx_disable(net_dev); + + dma_free_coherent(dev, priv->tx_buf.alloc_len, + priv->tx_buf.alloc, priv->tx_buf.alloc_phys); + dma_free_coherent(dev, priv->rx_buf.alloc_len, + priv->rx_buf.alloc, priv->rx_buf.alloc_phys); + + return 0; +} + +static struct rtnl_link_stats64 * +ec_bhf_get_stats(struct net_device *net_dev, + struct rtnl_link_stats64 *stats) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + + stats->rx_errors = ioread8(priv->mac_io + MAC_RX_ERR_CNT) + + ioread8(priv->mac_io + MAC_CRC_ERR_CNT) + + ioread8(priv->mac_io + MAC_FRAME_ERR_CNT); + stats->rx_packets = ioread32(priv->mac_io + MAC_RX_FRAME_CNT); + stats->tx_packets = ioread32(priv->mac_io + MAC_TX_FRAME_CNT); + stats->rx_dropped = ioread8(priv->mac_io + MAC_DROPPED_FRMS); + + stats->tx_bytes = priv->stat_tx_bytes; + stats->rx_bytes = priv->stat_rx_bytes; + + return stats; +} + +static const struct net_device_ops ec_bhf_netdev_ops = { + .ndo_start_xmit = ec_bhf_start_xmit, + .ndo_open = ec_bhf_open, + .ndo_stop = ec_bhf_stop, + .ndo_get_stats64 = ec_bhf_get_stats, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr +}; + +static int ec_bhf_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct net_device *net_dev; + struct ec_bhf_priv *priv; + void * __iomem dma_io; + void * __iomem io; + int err = 0; + + err = pci_enable_device(dev); + if (err) + return err; + + pci_set_master(dev); + + err = pci_set_dma_mask(dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&dev->dev, + "Required dma mask not supported, failed to initialize device\n"); + err = -EIO; + goto err_disable_dev; + } + + err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&dev->dev, + "Required dma mask not supported, failed to initialize device\n"); + goto err_disable_dev; + } + + err = pci_request_regions(dev, "ec_bhf"); + if (err) { + dev_err(&dev->dev, "Failed to request pci memory regions\n"); + goto err_disable_dev; + } + + io = pci_iomap(dev, 0, 0); + if (!io) { + dev_err(&dev->dev, "Failed to map pci card memory bar 0"); + err = -EIO; + goto err_release_regions; + } + + dma_io = pci_iomap(dev, 2, 0); + if (!dma_io) { + dev_err(&dev->dev, "Failed to map pci card memory bar 2"); + err = -EIO; + goto err_unmap; + } + + net_dev = alloc_etherdev(sizeof(struct ec_bhf_priv)); + if (net_dev == 0) { + err = -ENOMEM; + goto err_unmap_dma_io; + } + + pci_set_drvdata(dev, net_dev); + SET_NETDEV_DEV(net_dev, &dev->dev); + + net_dev->features = 0; + net_dev->flags |= IFF_NOARP; + + net_dev->netdev_ops = &ec_bhf_netdev_ops; + + priv = netdev_priv(net_dev); + priv->net_dev = net_dev; + priv->io = io; + priv->dma_io = dma_io; + priv->dev = dev; + + err = ec_bhf_setup_offsets(priv); + if (err < 0) + goto err_free_net_dev; + + memcpy_fromio(net_dev->dev_addr, priv->mii_io + MII_MAC_ADDR, 6); + + dev_dbg(&dev->dev, "CX5020 Ethercat master address: %pM\n", + net_dev->dev_addr); + + err = register_netdev(net_dev); + if (err < 0) + goto err_free_net_dev; + + return 0; + +err_free_net_dev: + free_netdev(net_dev); +err_unmap_dma_io: + pci_iounmap(dev, dma_io); +err_unmap: + pci_iounmap(dev, io); +err_release_regions: + pci_release_regions(dev); +err_disable_dev: + pci_clear_master(dev); + pci_disable_device(dev); + + return err; +} + +static void ec_bhf_remove(struct pci_dev *dev) +{ + struct net_device *net_dev = pci_get_drvdata(dev); + struct ec_bhf_priv *priv = netdev_priv(net_dev); + + unregister_netdev(net_dev); + free_netdev(net_dev); + + pci_iounmap(dev, priv->dma_io); + pci_iounmap(dev, priv->io); + pci_release_regions(dev); + pci_clear_master(dev); + pci_disable_device(dev); +} + +static struct pci_driver pci_driver = { + .name = "ec_bhf", + .id_table = ids, + .probe = ec_bhf_probe, + .remove = ec_bhf_remove, +}; + +static int __init ec_bhf_init(void) +{ + return pci_register_driver(&pci_driver); +} + +static void __exit ec_bhf_exit(void) +{ + pci_unregister_driver(&pci_driver); +} + +module_init(ec_bhf_init); +module_exit(ec_bhf_exit); + +module_param(polling_frequency, long, S_IRUGO); +MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dariusz Marcinkiewicz "); From 4a817aa78f573c6964f16d9aea3d0d10a226ade4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 May 2014 09:56:53 +0200 Subject: [PATCH 050/178] mac80211: allow VHT with peers not capable of 40MHz There are two (related) issues with this. One case, reported by Michal, is related to hostap: it unsets the 20/40 capability bit for stations that associate when it's in 20 MHz mode. The other case, reported by Eyal, is that some APs like Netgear R6300v2 and probably others based on the BCM4360 chipset can be configured for doing VHT at 20Mhz. In this case the beacon has a VHT IE but the HT cap indicates transmitter only support 20Mhz. In both of these cases, we currently avoid VHT and use only HT this means we can't use the highest rates (MCS8), so fixing this leads to throughput improvements. Reported-by: Michal Kazior Reported-by: Eyal Shapira Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index e9e36a256165..9265adfdabfc 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -129,9 +129,12 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; - /* A VHT STA must support 40 MHz */ - if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) - return; + /* + * A VHT STA must support 40 MHz, but if we verify that here + * then we break a few things - some APs (e.g. Netgear R6300v2 + * and others based on the BCM4360 chipset) will unset this + * capability bit when operating in 20 MHz. + */ vht_cap->vht_supported = true; From f9ac71bfcc5c937ff02765dc316cf5bc01d21d97 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 26 Feb 2014 14:46:35 +0200 Subject: [PATCH 051/178] mac80211: fix vif name tracing If sdata doesn't have a valid dev (e.g. in case of monitor vif), the vif_name field was initialized with (a length of) some short string, but later was set to a different, potentially larger one. This resulted in out-of-bounds write, which usually appeared as garbage in the trace log. Simply trace sdata->name, as it should always have the correct name for both cases. Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a0b0aea76525..cec5b60487a4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -21,10 +21,10 @@ #define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ __field(bool, p2p) \ - __string(vif_name, sdata->dev ? sdata->dev->name : "") + __string(vif_name, sdata->name) #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name) + __assign_str(vif_name, sdata->name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" From 6b5eeb7f874b689403e52a646e485d0191ab9507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 9 May 2014 14:45:00 +0200 Subject: [PATCH 052/178] net: cdc_mbim: handle unaccelerated VLAN tagged frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver maps 802.1q VLANs to MBIM sessions. The mapping is based on a bogus assumption that all tagged frames will use the acceleration API because we enable NETIF_F_HW_VLAN_CTAG_TX. This fails for e.g. frames tagged in userspace using packet sockets. Such frames will erroneously be considered as untagged and silently dropped based on not being IP. Fix by falling back to looking into the ethernet header for a tag if no accelerated tag was found. Fixes: a82c7ce5bc5b ("net: cdc_ncm: map MBIM IPS SessionID to VLAN ID") Cc: Greg Suarez Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 39 +++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 13f7705fd679..2e025ddcef21 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -120,6 +120,16 @@ static void cdc_mbim_unbind(struct usbnet *dev, struct usb_interface *intf) cdc_ncm_unbind(dev, intf); } +/* verify that the ethernet protocol is IPv4 or IPv6 */ +static bool is_ip_proto(__be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + return true; + } + return false; +} static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { @@ -128,6 +138,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb struct cdc_ncm_ctx *ctx = info->ctx; __le32 sign = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); u16 tci = 0; + bool is_ip; u8 *c; if (!ctx) @@ -137,25 +148,32 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb if (skb->len <= ETH_HLEN) goto error; + /* Some applications using e.g. packet sockets will + * bypass the VLAN acceleration and create tagged + * ethernet frames directly. We primarily look for + * the accelerated out-of-band tag, but fall back if + * required + */ + skb_reset_mac_header(skb); + if (vlan_get_tag(skb, &tci) < 0 && skb->len > VLAN_ETH_HLEN && + __vlan_get_tag(skb, &tci) == 0) { + is_ip = is_ip_proto(vlan_eth_hdr(skb)->h_vlan_encapsulated_proto); + skb_pull(skb, VLAN_ETH_HLEN); + } else { + is_ip = is_ip_proto(eth_hdr(skb)->h_proto); + skb_pull(skb, ETH_HLEN); + } + /* mapping VLANs to MBIM sessions: * no tag => IPS session <0> * 1 - 255 => IPS session * 256 - 511 => DSS session * 512 - 4095 => unsupported, drop */ - vlan_get_tag(skb, &tci); - switch (tci & 0x0f00) { case 0x0000: /* VLAN ID 0 - 255 */ - /* verify that datagram is IPv4 or IPv6 */ - skb_reset_mac_header(skb); - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - case htons(ETH_P_IPV6): - break; - default: + if (!is_ip) goto error; - } c = (u8 *)&sign; c[3] = tci; break; @@ -169,7 +187,6 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb "unsupported tci=0x%04x\n", tci); goto error; } - skb_pull(skb, ETH_HLEN); } spin_lock_bh(&ctx->mtx); From 0953f78971040fff09064bb564d9ac0cd1fb4e69 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:20 +0200 Subject: [PATCH 053/178] net: mdio-gpio: fix device-tree binding documentation Fix aliases syntax in device-tree binding example to avoid copy-paste errors (the alias would be dropped silently). Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mdio-gpio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt index c79bab025369..8dbcf8295c6c 100644 --- a/Documentation/devicetree/bindings/net/mdio-gpio.txt +++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt @@ -14,7 +14,7 @@ node. Example: aliases { - mdio-gpio0 = <&mdio0>; + mdio-gpio0 = &mdio0; }; mdio0: mdio { From 7f52da56f76f61112a9c1db41975376764828e71 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:21 +0200 Subject: [PATCH 054/178] net: mdio-gpio: warn about missing bus alias id Use a sane default bus id (rather than -ENODEV) and print a warning when the bus alias id is missing. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 9c4defdec67b..5f1a2250018f 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -215,6 +215,10 @@ static int mdio_gpio_probe(struct platform_device *pdev) if (pdev->dev.of_node) { pdata = mdio_gpio_of_get_data(pdev); bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); + if (bus_id < 0) { + dev_warn(&pdev->dev, "failed to get alias id\n"); + bus_id = 0; + } } else { pdata = dev_get_platdata(&pdev->dev); bus_id = pdev->id; From 59993f48b38fd46863b23bb1bb1dc3291e7278fb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:22 +0200 Subject: [PATCH 055/178] Revert "net: eth: cpsw: Correctly attach to GPIO bitbang MDIO driver" This reverts commit f8d56d8f892be43a2404356073e16401eb5a42e6 ("net: eth: cpsw: Correctly attach to GPIO bitbang MDIO driver"). Fix potential null-pointer dereference at probe if the mdio-gpio device has not been successfully probed yet. The offending commit is plain wrong for a number of reasons. First of all it accesses internal driver data of an unrelated device. Neither does it check that the data is non-null (which it is in case the device has not been probed yet). Furthermore, the decision on whether to treat any driver data according to the mdio-gpio driver's internals is made based on the node name. But the name is not compared against "mdio" which is the normal name for the node, but rather against "gpio" which the node does not have to be named (and shouldn't be according to the binding documentation). [ If this hack is to be kept out-of-tree it should at least be matching against the compatible property. ] Cc: Stefan Roese Cc: stable # v3.14 Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 36aa109416c4..18d83d8d7f74 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1871,18 +1871,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); - - if (strncmp(mdio->name, "gpio", 4) == 0) { - /* GPIO bitbang MDIO driver attached */ - struct mii_bus *bus = dev_get_drvdata(&mdio->dev); - - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, bus->id, phyid); - } else { - /* davinci MDIO driver attached */ - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, mdio->name, phyid); - } + snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), + PHY_ID_FMT, mdio->name, phyid); mac_addr = of_get_mac_address(slave_node); if (mac_addr) From 6954cc1f238199e971ec905c5cc87120806ac981 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:23 +0200 Subject: [PATCH 056/178] net: cpsw: fix null dereference at probe Fix null-pointer dereference at probe when the mdio platform device is missing (e.g. when it has been disabled in DT). Cc: stable # v3.8 Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 18d83d8d7f74..db9360cd861c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1871,6 +1871,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); + if (!mdio) { + pr_err("Missing mdio platform device\n"); + return -EINVAL; + } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); From 60e71ab56b5fbd839aaef4f4af7778d86e0206f0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:24 +0200 Subject: [PATCH 057/178] net: cpsw: add missing of_node_put Add missing of_node_put to avoid kref leak. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index db9360cd861c..c331b7ebc812 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1871,6 +1871,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); + of_node_put(mdio_node); if (!mdio) { pr_err("Missing mdio platform device\n"); return -EINVAL; From de682941eef3e5f6d1b653a6c214bc8a288f17c1 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 8 May 2014 12:34:31 +0300 Subject: [PATCH 058/178] bnx2x: Fix UNDI driver unload Commit 91ebb928b "bnx2x: Add support for Multi-Function UNDI" contains a bug which prevent the emptying of the device's Rx buffers before reset. As a result, on new boards it is likely HW will reach some fatal assertion once its interfaces load after UNDI was previously loaded. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index b260913db236..3b0d43154e67 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10051,8 +10051,8 @@ static void bnx2x_prev_unload_close_mac(struct bnx2x *bp, #define BCM_5710_UNDI_FW_MF_MAJOR (0x07) #define BCM_5710_UNDI_FW_MF_MINOR (0x08) #define BCM_5710_UNDI_FW_MF_VERS (0x05) -#define BNX2X_PREV_UNDI_MF_PORT(p) (0x1a150c + ((p) << 4)) -#define BNX2X_PREV_UNDI_MF_FUNC(f) (0x1a184c + ((f) << 4)) +#define BNX2X_PREV_UNDI_MF_PORT(p) (BAR_TSTRORM_INTMEM + 0x150c + ((p) << 4)) +#define BNX2X_PREV_UNDI_MF_FUNC(f) (BAR_TSTRORM_INTMEM + 0x184c + ((f) << 4)) static bool bnx2x_prev_unload_undi_fw_supports_mf(struct bnx2x *bp) { u8 major, minor, version; @@ -10352,6 +10352,7 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) /* Reset should be performed after BRB is emptied */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { u32 timer_count = 1000; + bool need_write = true; /* Close the MAC Rx to prevent BRB from filling up */ bnx2x_prev_unload_close_mac(bp, &mac_vals); @@ -10398,7 +10399,10 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) * cleaning methods - might be redundant but harmless. */ if (bnx2x_prev_unload_undi_fw_supports_mf(bp)) { - bnx2x_prev_unload_undi_mf(bp); + if (need_write) { + bnx2x_prev_unload_undi_mf(bp); + need_write = false; + } } else if (prev_undi) { /* If UNDI resides in memory, * manually increment it From a9de0500083c18589ba2ea4543135c1bea8419ec Mon Sep 17 00:00:00 2001 From: Emil Goode Date: Fri, 9 May 2014 01:07:17 +0200 Subject: [PATCH 059/178] net: cassini: use nested lock annotation In the cas_lock_tx function we acquire multiple locks in a loop and need to use nested lock annotation to prevent lockdep warnings. Reported-by: Meelis Roos Signed-off-by: Emil Goode Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index df8d383acf48..b9ac20f42651 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -246,7 +246,7 @@ static inline void cas_lock_tx(struct cas *cp) int i; for (i = 0; i < N_TX_RINGS; i++) - spin_lock(&cp->tx_lock[i]); + spin_lock_nested(&cp->tx_lock[i], i); } static inline void cas_lock_all(struct cas *cp) From c1e517fbbcdb13f50662af4edc11c3251fe44f86 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:21 +0100 Subject: [PATCH 060/178] batman-adv: fix neigh_ifinfo imbalance The neigh_ifinfo object must be freed if it has been used in batadv_iv_ogm_process_per_outif(). This is a regression introduced by 89652331c00f43574515059ecbf262d26d885717 ("batman-adv: split tq information in neigh_node struct") Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b3bd4ec3fd94..f04224c32005 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1545,6 +1545,8 @@ out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) batadv_orig_node_free_ref(orig_neigh_node); out: + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); if (router) batadv_neigh_node_free_ref(router); if (router_router) From 000c8dff97311357535d64539e58990526e4de70 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:22 +0100 Subject: [PATCH 061/178] batman-adv: fix neigh reference imbalance When an interface is removed from batman-adv, the orig_ifinfo of a orig_node may be removed without releasing the router first. This will prevent the reference for the neighbor pointed at by the orig_ifinfo->router to be released, and this leak may result in reference leaks for the interface used by this neighbor. Fix that. This is a regression introduced by 7351a4822d42827ba0110677c0cbad88a3d52585 ("batman-adv: split out router from orig_node"). Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ffd9dfbd9b0e..a43da6918512 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -501,12 +501,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) { struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router; orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + /* this is the last reference to this object */ + router = rcu_dereference_protected(orig_ifinfo->router, true); + if (router) + batadv_neigh_node_free_ref_now(router); kfree(orig_ifinfo); } From 7b955a9fc164487d7c51acb9787f6d1b01b35ef6 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:23 +0100 Subject: [PATCH 062/178] batman-adv: always run purge_orig_neighbors The current code will not execute batadv_purge_orig_neighbors() when an orig_ifinfo has already been purged. However we need to run it in any case. Fix that. This is a regression introduced by 7351a4822d42827ba0110677c0cbad88a3d52585 ("batman-adv: split out router from orig_node") Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a43da6918512..8104c3cf7741 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -862,7 +862,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, { struct batadv_neigh_node *best_neigh_node; struct batadv_hard_iface *hard_iface; - bool changed; + bool changed_ifinfo, changed_neigh; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { @@ -872,10 +872,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, jiffies_to_msecs(orig_node->last_seen)); return true; } - changed = batadv_purge_orig_ifinfo(bat_priv, orig_node); - changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node); + changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node); + changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node); - if (!changed) + if (!changed_ifinfo && !changed_neigh) return false; /* first for NULL ... */ From d088be8042841f024156ee68fecfef7503d660cb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 13:39:21 +0200 Subject: [PATCH 063/178] netfilter: nf_tables: reset rule number counter after jump and goto Otherwise we start incrementing the rule number counter from the previous chain iteration. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 804105391b9a..4368c5850548 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -123,7 +123,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - int rulenum = 0; + int rulenum; /* * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. @@ -131,6 +131,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: + rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; From 709de13f0c532fe9c468c094aff069a725ed57fe Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:24 +0100 Subject: [PATCH 064/178] batman-adv: fix removing neigh_ifinfo When an interface is removed separately, all neighbors need to be checked if they have a neigh_ifinfo structure for that particular interface. If that is the case, remove that ifinfo so any references to a hard interface can be freed. This is a regression introduced by 89652331c00f43574515059ecbf262d26d885717 ("batman-adv: split tq information in neigh_node struct") Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8104c3cf7741..1785da37b82c 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -706,6 +706,47 @@ free_orig_node: return NULL; } +/** + * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor + * @bat_priv: the bat priv with all the soft interface information + * @neigh: orig node which is to be checked + */ +static void +batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, + struct batadv_neigh_node *neigh) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + struct batadv_hard_iface *if_outgoing; + struct hlist_node *node_tmp; + + spin_lock_bh(&neigh->ifinfo_lock); + + /* for all ifinfo objects for this neighinator */ + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh->ifinfo_list, list) { + if_outgoing = neigh_ifinfo->if_outgoing; + + /* always keep the default interface */ + if (if_outgoing == BATADV_IF_DEFAULT) + continue; + + /* don't purge if the interface is not (going) down */ + if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && + (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && + (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + continue; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "neighbor/ifinfo purge: neighbor %pM, iface: %s\n", + neigh->addr, if_outgoing->net_dev->name); + + hlist_del_rcu(&neigh_ifinfo->list); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } + + spin_unlock_bh(&neigh->ifinfo_lock); +} + /** * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator * @bat_priv: the bat priv with all the soft interface information @@ -805,6 +846,11 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); + } else { + /* only necessary if not the whole neighbor is to be + * deleted, but some interface has been removed. + */ + batadv_purge_neigh_ifinfo(bat_priv, neigh_node); } } From 1c4abec0baf25ffb92a28cc99d4231feeaa4d3f3 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 8 May 2014 09:48:10 +0300 Subject: [PATCH 065/178] iwlwifi: mvm: fix setting channel in monitor mode There was a deadlock in monitor mode when we were setting the channel if the channel was not 1. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.3 #4 Not tainted ------------------------------------------------------- iw/3323 is trying to acquire lock: (&local->chanctx_mtx){+.+.+.}, at: [] ieee80211_vif_release_channel+0x42/0xb0 [mac80211] but task is already holding lock: (&local->iflist_mtx){+.+...}, at: [] ieee80211_set_monitor_channel+0x5a/0x1b0 [mac80211] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&local->iflist_mtx){+.+...}: [] __lock_acquire+0xb3b/0x13b0 [] lock_acquire+0xb0/0x1f0 [] mutex_lock_nested+0x78/0x4f0 [] ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] iwl_mvm_recalc_multicast+0x49/0xa0 [iwlmvm] [] iwl_mvm_configure_filter+0x4e/0x70 [iwlmvm] [] ieee80211_configure_filter+0x153/0x5f0 [mac80211] [] ieee80211_reconfig_filter+0x15/0x20 [mac80211] [snip] -> #1 (&mvm->mutex){+.+.+.}: [] __lock_acquire+0xb3b/0x13b0 [] lock_acquire+0xb0/0x1f0 [] mutex_lock_nested+0x78/0x4f0 [] iwl_mvm_add_chanctx+0x56/0xe0 [iwlmvm] [] ieee80211_new_chanctx+0x13e/0x410 [mac80211] [] ieee80211_vif_use_channel+0x1c3/0x5a0 [mac80211] [] ieee80211_add_virtual_monitor+0x1ab/0x6b0 [mac80211] [] ieee80211_do_open+0xe6a/0x15a0 [mac80211] [] ieee80211_open+0x59/0x60 [mac80211] [snip] -> #0 (&local->chanctx_mtx){+.+.+.}: [] check_prevs_add+0x977/0x980 [] __lock_acquire+0xb3b/0x13b0 [] lock_acquire+0xb0/0x1f0 [] mutex_lock_nested+0x78/0x4f0 [] ieee80211_vif_release_channel+0x42/0xb0 [mac80211] [] ieee80211_set_monitor_channel+0x113/0x1b0 [mac80211] [] cfg80211_set_monitor_channel+0x77/0x2b0 [cfg80211] [] __nl80211_set_channel+0x122/0x140 [cfg80211] [] nl80211_set_wiphy+0x284/0xaf0 [cfg80211] [snip] other info that might help us debug this: Chain exists of: &local->chanctx_mtx --> &mvm->mutex --> &local->iflist_mtx Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&local->iflist_mtx); lock(&mvm->mutex); lock(&local->iflist_mtx); lock(&local->chanctx_mtx); *** DEADLOCK *** This deadlock actually occurs: INFO: task iw:3323 blocked for more than 120 seconds. Not tainted 3.14.3 #4 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. iw D ffff8800c8afcd80 4192 3323 3322 0x00000000 ffff880078fdb7e0 0000000000000046 ffff8800c8afcd80 ffff880078fdbfd8 00000000001d5540 00000000001d5540 ffff8801141b0000 ffff8800c8afcd80 ffff880078ff9e38 ffff880078ff9e38 ffff880078ff9e40 0000000000000246 Call Trace: [] schedule_preempt_disabled+0x31/0x80 [] mutex_lock_nested+0x19d/0x4f0 [] ? ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] ? ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] ? iwl_mvm_power_mac_update_mode+0xc0/0xc0 [iwlmvm] [] ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] _iwl_mvm_power_update_binding+0x27/0x80 [iwlmvm] [] iwl_mvm_unassign_vif_chanctx+0x81/0xc0 [iwlmvm] [] __ieee80211_vif_release_channel+0xdf/0x470 [mac80211] [] ieee80211_vif_release_channel+0x4a/0xb0 [mac80211] [] ieee80211_set_monitor_channel+0x113/0x1b0 [mac80211] [] cfg80211_set_monitor_channel+0x77/0x2b0 [cfg80211] [] __nl80211_set_channel+0x122/0x140 [cfg80211] [] nl80211_set_wiphy+0x284/0xaf0 [cfg80211] This fixes https://bugzilla.kernel.org/show_bug.cgi?id=75541 Cc: [3.13+] Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 593f723a74c4..4b0b8b6571ee 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1007,7 +1007,7 @@ static void iwl_mvm_mc_iface_iterator(void *_data, u8 *mac, memcpy(cmd->bssid, vif->bss_conf.bssid, ETH_ALEN); len = roundup(sizeof(*cmd) + cmd->count * ETH_ALEN, 4); - ret = iwl_mvm_send_cmd_pdu(mvm, MCAST_FILTER_CMD, CMD_SYNC, len, cmd); + ret = iwl_mvm_send_cmd_pdu(mvm, MCAST_FILTER_CMD, CMD_ASYNC, len, cmd); if (ret) IWL_ERR(mvm, "mcast filter cmd error. ret=%d\n", ret); } @@ -1023,7 +1023,7 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm) if (WARN_ON_ONCE(!mvm->mcast_filter_cmd)) return; - ieee80211_iterate_active_interfaces( + ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_mc_iface_iterator, &iter_data); } From 1a466ae96e9f749d02a73315a3e66375e61a61dd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 May 2014 14:54:42 -0700 Subject: [PATCH 066/178] ptp: fix kconfig dependency warnings Fix kconfig warnings: PTP_1588_CLOCK selects NET_PTP_CLASSIFY, which depends on NET, so PTP_1588_CLOCK should also depend on NET. PTP_1588_CLOCK_PCH selects PTP_1588_CLOCK so the former should depend on NET. warning: (IXP4XX_ETH && PTP_1588_CLOCK) selects NET_PTP_CLASSIFY which has unmet direct dependencies (NET) warning: (SFC && TILE_NET && BFIN_MAC_USE_HWSTAMP && TIGON3 && FEC && E1000E && IGB && IXGBE && I40E && MLX4_EN && SXGBE_ETH && STMMAC_ETH && TI_CPTS && PTP_1588_CLOCK_GIANFAR && PTP_1588_CLOCK_IXP46X && DP83640_PHY && PTP_1588_CLOCK_PCH) selects PTP_1588_CLOCK which has unmet direct dependencies (NET) [This warning is caused by the new 'depends on NET' in PTP_1588_CLOCK.] Signed-off-by: Randy Dunlap Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 6963bdf54175..6aea373547f6 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -6,6 +6,7 @@ menu "PTP clock support" config PTP_1588_CLOCK tristate "PTP clock support" + depends on NET select PPS select NET_PTP_CLASSIFY help @@ -74,7 +75,7 @@ config DP83640_PHY config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" depends on X86 || COMPILE_TEST - depends on HAS_IOMEM + depends on HAS_IOMEM && NET select PTP_1588_CLOCK help This driver adds support for using the PCH EG20T as a PTP From fd71143645a9958e437c8cf394be2c44a6acb23a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 9 May 2014 23:43:40 +0200 Subject: [PATCH 067/178] vti6: Don't unregister pernet ops twice on init errors If we fail to register one of the xfrm protocol handlers we will unregister the pernet ops twice on the error exit path. This will probably lead to a kernel panic as the double deregistration leads to a double kfree(). Fix this by removing one of the calls to do it only once. Fixes: fa9ad96d49 ("vti6: Update the ipv6 side to use its own...") Signed-off-by: Mathias Krause Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b7c0f827140b..a51100379f4a 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1097,7 +1097,6 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); if (err < 0) { - unregister_pernet_device(&vti6_net_ops); pr_err("%s: can't register vti6 protocol\n", __func__); goto out; @@ -1106,7 +1105,6 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); if (err < 0) { xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti6_net_ops); pr_err("%s: can't register vti6 protocol\n", __func__); goto out; @@ -1116,7 +1114,6 @@ static int __init vti6_tunnel_init(void) if (err < 0) { xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti6_net_ops); pr_err("%s: can't register vti6 protocol\n", __func__); goto out; From 6d004d6cc73920299adf4cfe25010b348fc94395 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 12 May 2014 09:09:26 +0200 Subject: [PATCH 068/178] vti: Use the tunnel mark for lookup in the error handlers. We need to use the mark we get from the tunnels o_key to lookup the right vti state in the error handlers. This patch ensures that. Fixes: df3893c1 ("vti: Update the ipv4 side to use it's own receive hook.") Fixes: fa9ad96d ("vti6: Update the ipv6 side to use its own receive hook.") Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 5 ++++- net/ipv6/ip6_vti.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index cd62596e9a87..d3f64d7f355e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -239,6 +239,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) static int vti4_err(struct sk_buff *skb, u32 info) { __be32 spi; + __u32 mark; struct xfrm_state *x; struct ip_tunnel *tunnel; struct ip_esp_hdr *esph; @@ -254,6 +255,8 @@ static int vti4_err(struct sk_buff *skb, u32 info) if (!tunnel) return -1; + mark = be32_to_cpu(tunnel->parms.o_key); + switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); @@ -281,7 +284,7 @@ static int vti4_err(struct sk_buff *skb, u32 info) return 0; } - x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET); if (!x) return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index a51100379f4a..6cc9f9371cc5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -511,6 +511,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __be32 spi; + __u32 mark; struct xfrm_state *x; struct ip6_tnl *t; struct ip_esp_hdr *esph; @@ -524,6 +525,8 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!t) return -1; + mark = be32_to_cpu(t->parms.o_key); + switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data + offset); @@ -545,7 +548,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != NDISC_REDIRECT) return 0; - x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET6); if (!x) return 0; From 5467a512216753d54f757314c73dbc60f659f9e6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 18:33:11 +0200 Subject: [PATCH 069/178] netfilter: nf_tables: fix goto action This patch fixes a crash when trying to access the counters and the default chain policy from the non-base chain that we have reached via the goto chain. Fix this by falling back on the original base chain after returning from the custom chain. While fixing this, kill the inline function to account chain statistics to improve source code readability. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 4368c5850548..7d83a49fd8e5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -66,20 +66,6 @@ struct nft_jumpstack { int rulenum; }; -static inline void -nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, - struct nft_jumpstack *jumpstack, unsigned int stackptr) -{ - struct nft_stats __percpu *stats; - const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; - - rcu_read_lock_bh(); - stats = rcu_dereference(nft_base_chain(chain)->stats); - __this_cpu_inc(stats->pkts); - __this_cpu_add(stats->bytes, pkt->skb->len); - rcu_read_unlock_bh(); -} - enum nft_trace { NFT_TRACE_RULE, NFT_TRACE_RETURN, @@ -117,12 +103,13 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { - const struct nft_chain *chain = ops->priv; + const struct nft_chain *chain = ops->priv, *basechain = chain; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + struct nft_stats __percpu *stats; int rulenum; /* * Cache cursor to avoid problems in case that the cursor is updated @@ -209,12 +196,17 @@ next_rule: rulenum = jumpstack[stackptr].rulenum; goto next_rule; } - nft_chain_stats(chain, pkt, jumpstack, stackptr); if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, ++rulenum, NFT_TRACE_POLICY); - return nft_base_chain(chain)->policy; + rcu_read_lock_bh(); + stats = rcu_dereference(nft_base_chain(basechain)->stats); + __this_cpu_inc(stats->pkts); + __this_cpu_add(stats->bytes, pkt->skb->len); + rcu_read_unlock_bh(); + + return nft_base_chain(basechain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain); From 7b9d5ef932297413adcbd8be98fe612b9527a312 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 18:42:57 +0200 Subject: [PATCH 070/178] netfilter: nf_tables: fix tracing of the goto action Add missing code to trace goto actions. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7d83a49fd8e5..f55fb28264fa 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -171,8 +171,12 @@ next_rule: jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; stackptr++; - /* fall through */ + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; case NFT_GOTO: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: From f7e7e39b21c285ad73a62fac0736191b8d830704 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 18:46:02 +0200 Subject: [PATCH 071/178] netfilter: nf_tables: fix bogus rulenum after goto action After returning from the chain that we just went to with no matchings, we get a bogus rule number in the trace. To fix this, we would need to iterate over the list of remaining rules in the chain to update the rule number counter. Patrick suggested to set this to the maximum value since the default base chain policy is the very last action when the processing the base chain is over. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f55fb28264fa..be08a96b4f45 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -202,7 +202,7 @@ next_rule: } if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, basechain, ++rulenum, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); rcu_read_lock_bh(); stats = rcu_dereference(nft_base_chain(basechain)->stats); From 7e9bc10db275b22a9db0f976b33b5aeed288da73 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 11 May 2014 17:14:49 +0200 Subject: [PATCH 072/178] netfilter: nf_tables: fix missing return trace at the end of non-base chain Display "return" for implicit rule at the end of a non-base chain, instead of when popping chain from the stack. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index be08a96b4f45..421c36ac5145 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -182,18 +182,16 @@ next_rule: case NFT_RETURN: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); - - /* fall through */ + break; case NFT_CONTINUE: + if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN))) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); break; default: WARN_ON(1); } if (stackptr > 0) { - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); - stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; From 03e5da151b8e390f2e28c4edf8fbbb6ca6d7a7ed Mon Sep 17 00:00:00 2001 From: Daniel Kim Date: Fri, 9 May 2014 12:37:05 +0200 Subject: [PATCH 073/178] brcmfmac: Fix iovar 'bw_cap' set command failure Fix iovar 'bw_cap' set command failure introduced by commit ff3b0fba6f25555ef59c55d138a467d0f81d82d7 Author: Arend van Spriel Date: Sat Mar 15 12:00:57 2014 +0100 brcmfmac: fallback to mimo_bw_cap for older firmwares This resulted in disabling 20MHz operation in the firmware. Reviewed-by: Arend Van Spriel Signed-off-by: Daniel Kim Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index afb3d15e38ff..be1985296bdc 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -4948,7 +4948,7 @@ static int brcmf_enable_bw40_2g(struct brcmf_if *ifp) if (!err) { /* only set 2G bandwidth using bw_cap command */ band_bwcap.band = cpu_to_le32(WLC_BAND_2G); - band_bwcap.bw_cap = cpu_to_le32(WLC_BW_40MHZ_BIT); + band_bwcap.bw_cap = cpu_to_le32(WLC_BW_CAP_40MHZ); err = brcmf_fil_iovar_data_set(ifp, "bw_cap", &band_bwcap, sizeof(band_bwcap)); } else { From bbeb0eadcf9fe74fb2b9b1a6fea82cd538b1e556 Mon Sep 17 00:00:00 2001 From: Peter Christensen Date: Thu, 8 May 2014 11:15:37 +0200 Subject: [PATCH 074/178] macvlan: Don't propagate IFF_ALLMULTI changes on down interfaces. Clearing the IFF_ALLMULTI flag on a down interface could cause an allmulti overflow on the underlying interface. Attempting the set IFF_ALLMULTI on the underlying interface would cause an error and the log message: "allmulti touches root, set allmulti failed." Signed-off-by: Peter Christensen Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index b0e2865a6810..c5fb9cf95c12 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -458,8 +458,10 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (change & IFF_ALLMULTI) - dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); + } } static void macvlan_set_mac_lists(struct net_device *dev) From 1c3639005f48492e5f2d965779efd814e80f8b15 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 9 May 2014 11:11:39 +0200 Subject: [PATCH 075/178] sfc: fix calling of free_irq with already free vector If the sfc driver is in legacy interrupt mode (either explicitly by using interrupt_mode module param or by falling back to it) it will hit a warning at kernel/irq/manage.c because it will try to free an irq which wasn't allocated by it in the first place because the MSI(X) irqs are zero and it'll try to free them unconditionally. So fix it by checking if we're in legacy mode and freeing the appropriate irqs. CC: Zenghui Shi CC: Ben Hutchings CC: CC: Shradha Shah CC: David S. Miller Fixes: 1899c111a535 ("sfc: Fix IRQ cleanup in case of a probe failure") Reported-by: Zenghui Shi Signed-off-by: Nikolay Aleksandrov Acked-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/nic.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 32d969e857f7..89b83e59e1dc 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -156,13 +156,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) efx->net_dev->rx_cpu_rmap = NULL; #endif - /* Disable MSI/MSI-X interrupts */ - efx_for_each_channel(channel, efx) - free_irq(channel->irq, &efx->msi_context[channel->channel]); - - /* Disable legacy interrupt */ - if (efx->legacy_irq) + if (EFX_INT_MODE_USE_MSI(efx)) { + /* Disable MSI/MSI-X interrupts */ + efx_for_each_channel(channel, efx) + free_irq(channel->irq, + &efx->msi_context[channel->channel]); + } else { + /* Disable legacy interrupt */ free_irq(efx->legacy_irq, efx); + } } /* Register dump */ From c8965932a2e3b70197ec02c6741c29460279e2a8 Mon Sep 17 00:00:00 2001 From: Susant Sahani Date: Sat, 10 May 2014 00:11:32 +0530 Subject: [PATCH 076/178] ip6_tunnel: fix potential NULL pointer dereference The function ip6_tnl_validate assumes that the rtnl attribute IFLA_IPTUN_PROTO always be filled . If this attribute is not filled by the userspace application kernel get crashed with NULL pointer dereference. This patch fixes the potential kernel crash when IFLA_IPTUN_PROTO is missing . Signed-off-by: Susant Sahani Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b05b609f69d1..f6a66bb4114d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1557,7 +1557,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); From 64793110ad4d82e18d88a33307749c6562a6dd04 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 11 May 2014 19:59:43 +0300 Subject: [PATCH 077/178] iwlwifi: mvm: fix off-by-one in scan channels configuration tail should be equal to the last valid index, so decrease it by one. This error causes in "a gap" in some cases (as well as some possible out-of-bound write), finally resulting in ucode assertion. Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index cba88a379fc8..c28de54c75d4 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -732,7 +732,7 @@ int iwl_mvm_config_sched_scan(struct iwl_mvm *mvm, int band_2ghz = mvm->nvm_data->bands[IEEE80211_BAND_2GHZ].n_channels; int band_5ghz = mvm->nvm_data->bands[IEEE80211_BAND_5GHZ].n_channels; int head = 0; - int tail = band_2ghz + band_5ghz; + int tail = band_2ghz + band_5ghz - 1; u32 ssid_bitmap; int cmd_len; int ret; From c52666aef9f2dff39276eb53f15d99e2e229870f Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 13 May 2014 12:54:09 +0300 Subject: [PATCH 078/178] mac80211: fix suspend vs. association race If the association is in progress while we suspend, the stack will be in a messed up state. Clean it before we suspend. This patch completes Johannes's patch: 1a1cb744de160ee70086a77afff605bbc275d291 Author: Johannes Berg mac80211: fix suspend vs. authentication race Cc: Fixes: 12e7f517029d ("mac80211: cleanup generic suspend/resume procedures") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dee50aefd6e8..27600a9808ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3598,18 +3598,24 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); - if (ifmgd->auth_data) { + if (ifmgd->auth_data || ifmgd->assoc_data) { + const u8 *bssid = ifmgd->auth_data ? + ifmgd->auth_data->bss->bssid : + ifmgd->assoc_data->bss->bssid; + /* - * If we are trying to authenticate while suspending, cfg80211 - * won't know and won't actually abort those attempts, thus we - * need to do that ourselves. + * If we are trying to authenticate / associate while suspending, + * cfg80211 won't know and won't actually abort those attempts, + * thus we need to do that ourselves. */ - ieee80211_send_deauth_disassoc(sdata, - ifmgd->auth_data->bss->bssid, + ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); - ieee80211_destroy_auth_data(sdata, false); + if (ifmgd->assoc_data) + ieee80211_destroy_assoc_data(sdata, false); + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); } From b538b8ce76f69f7fa225bc0817bbb361b877ea23 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Tue, 13 May 2014 14:29:36 +0300 Subject: [PATCH 079/178] iwlwifi: mvm: prevent sched scan while not idle Prevent sched scan while not idle (including during association or in AP mode) instead of while associated only. This fixes my previous commit which was incomplete: commit bd5e4744a6ca64299b57a2682c720d00a475a734 Author: David Spinadel Date: Thu Apr 24 13:15:29 2014 +0300 iwlwifi: mvm: do no sched scan while associated Currently the FW doesn't support sched scan while associated, Prevent it. Signed-off-by: David Spinadel Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/iwlwifi/mvm/utils.c | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 4b0b8b6571ee..b41dc84e9431 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1807,7 +1807,7 @@ static int iwl_mvm_mac_sched_scan_start(struct ieee80211_hw *hw, mutex_lock(&mvm->mutex); - if (iwl_mvm_is_associated(mvm)) { + if (!iwl_mvm_is_idle(mvm)) { ret = -EBUSY; goto out; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 84c75a1b267e..f1ec0986c3c9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -1004,7 +1004,7 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif) } /* Assoc status */ -bool iwl_mvm_is_associated(struct iwl_mvm *mvm); +bool iwl_mvm_is_idle(struct iwl_mvm *mvm); /* Thermal management and CT-kill */ void iwl_mvm_tt_tx_backoff(struct iwl_mvm *mvm, u32 backoff); diff --git a/drivers/net/wireless/iwlwifi/mvm/utils.c b/drivers/net/wireless/iwlwifi/mvm/utils.c index 6fdbef9696d8..2180902266ae 100644 --- a/drivers/net/wireless/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/iwlwifi/mvm/utils.c @@ -645,21 +645,21 @@ bool iwl_mvm_low_latency(struct iwl_mvm *mvm) return result; } -static void iwl_mvm_assoc_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) +static void iwl_mvm_idle_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) { - bool *assoc = _data; + bool *idle = _data; - if (vif->bss_conf.assoc) - *assoc = true; + if (!vif->bss_conf.idle) + *idle = false; } -bool iwl_mvm_is_associated(struct iwl_mvm *mvm) +bool iwl_mvm_is_idle(struct iwl_mvm *mvm) { - bool assoc = false; + bool idle = true; ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, - iwl_mvm_assoc_iter, &assoc); + iwl_mvm_idle_iter, &idle); - return assoc; + return idle; } From f5651986fe438c1ba05fdafa383d38bd86713d13 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 May 2014 15:45:55 +0200 Subject: [PATCH 080/178] nl80211: fix NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL API My commit removing that also removed it from the header file which can break compilation of userspace that needed it, add it back for API/ABI compatibility purposes (but no code to implement anything for it.) Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1ba9d626aa83..194c1eab04d8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3856,6 +3856,8 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested * to work properly to suppport receiving regulatory hints from * cellular base stations. + * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only + * here to reserve the value for API/ABI compatibility) * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station * mode @@ -3897,7 +3899,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_HT_IBSS = 1 << 1, NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, - /* bit 4 is reserved - don't use */ + NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, NL80211_FEATURE_SAE = 1 << 5, NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, NL80211_FEATURE_SCAN_FLUSH = 1 << 7, From 2176d5d41891753774f648b67470398a5acab584 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Fri, 9 May 2014 13:16:48 +0800 Subject: [PATCH 081/178] neigh: set nud_state to NUD_INCOMPLETE when probing router reachability Since commit 7e98056964("ipv6: router reachability probing"), a router falls into NUD_FAILED will be probed. Now if function rt6_select() selects a router which neighbour state is NUD_FAILED, and at the same time function rt6_probe() changes the neighbour state to NUD_PROBE, then function dst_neigh_output() can directly send packets, but actually the neighbour still is unreachable. If we set nud_state to NUD_INCOMPLETE instead NUD_PROBE, packets will not be sent out until the neihbour is reachable. In addition, because the route should be probes with a single NS, so we must set neigh->probes to neigh_max_probes(), then the neigh timer timeout and function neigh_timer_handler() will not send other NS Messages. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8f8a96ef9f3f..32d872eec7f5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1248,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh->nud_state = NUD_INCOMPLETE; + atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); } From 773cd38f40b8834be991dbfed36683acc1dd41ee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 13 May 2014 15:05:55 -0700 Subject: [PATCH 082/178] net: filter: x86: fix JIT address randomization bpf_alloc_binary() adds 128 bytes of room to JITed program image and rounds it up to the nearest page size. If image size is close to page size (like 4000), it is rounded to two pages: round_up(4000 + 4 + 128) == 8192 then 'hole' is computed as 8192 - (4000 + 4) = 4188 If prandom_u32() % hole selects a number >= PAGE_SIZE - sizeof(*header) then kernel will crash during bpf_jit_free(): kernel BUG at arch/x86/mm/pageattr.c:887! Call Trace: [] change_page_attr_set_clr+0x135/0x460 [] ? _raw_spin_unlock_irq+0x30/0x50 [] set_memory_rw+0x2f/0x40 [] bpf_jit_free_deferred+0x2d/0x60 [] process_one_work+0x1d8/0x6a0 [] ? process_one_work+0x178/0x6a0 [] worker_thread+0x11c/0x370 since bpf_jit_free() does: unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; to compute start address of 'bpf_binary_header' and header->pages will pass junk to: set_memory_rw(addr, header->pages); Fix it by making sure that &header->image[prandom_u32() % hole] and &header are in the same page Fixes: 314beb9bcabfd ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") Signed-off-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index dc017735bb91..6d5663a599a7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -171,7 +171,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ header->pages = sz / PAGE_SIZE; - hole = sz - (proglen + sizeof(*header)); + hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header)); /* insert a random number of int3 instructions before BPF code */ *image_ptr = &header->image[prandom_u32() % hole]; From 3d4405226d27b3a215e4d03cfa51f536244e5de7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 11 May 2014 22:59:30 +0200 Subject: [PATCH 083/178] net: avoid dependency of net_get_random_once on nop patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net_get_random_once depends on the static keys infrastructure to patch up the branch to the slow path during boot. This was realized by abusing the static keys api and defining a new initializer to not enable the call site while still indicating that the branch point should get patched up. This was needed to have the fast path considered likely by gcc. The static key initialization during boot up normally walks through all the registered keys and either patches in ideal nops or enables the jump site but omitted that step on x86 if ideal nops where already placed at static_key branch points. Thus net_get_random_once branches not always became active. This patch switches net_get_random_once to the ordinary static_key api and thus places the kernel fast path in the - by gcc considered - unlikely path. Microbenchmarks on Intel and AMD x86-64 showed that the unlikely path actually beats the likely path in terms of cycle cost and that different nop patterns did not make much difference, thus this switch should not be noticeable. Fixes: a48e42920ff38b ("net: introduce new macro net_get_random_once") Reported-by: Tuomas Räsänen Cc: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 15 ++++----------- net/core/utils.c | 8 ++++---- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 94734a6259a4..17d83393afcc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -248,24 +248,17 @@ do { \ bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key); -#ifdef HAVE_JUMP_LABEL -#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \ - { .enabled = ATOMIC_INIT(0), .entries = (void *)1 }) -#else /* !HAVE_JUMP_LABEL */ -#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE -#endif /* HAVE_JUMP_LABEL */ - #define net_get_random_once(buf, nbytes) \ ({ \ bool ___ret = false; \ static bool ___done = false; \ - static struct static_key ___done_key = \ - ___NET_RANDOM_STATIC_KEY_INIT; \ - if (!static_key_true(&___done_key)) \ + static struct static_key ___once_key = \ + STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) \ ___ret = __net_get_random_once(buf, \ nbytes, \ &___done, \ - &___done_key); \ + &___once_key); \ ___ret; \ }) diff --git a/net/core/utils.c b/net/core/utils.c index 2f737bf90b3f..eed34338736c 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w) { struct __net_random_once_work *work = container_of(w, struct __net_random_once_work, work); - if (!static_key_enabled(work->key)) - static_key_slow_inc(work->key); + BUG_ON(!static_key_enabled(work->key)); + static_key_slow_dec(work->key); kfree(work); } @@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key) } bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key) + struct static_key *once_key) { static DEFINE_SPINLOCK(lock); unsigned long flags; @@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, *done = true; spin_unlock_irqrestore(&lock, flags); - __net_random_once_disable_jump(done_key); + __net_random_once_disable_jump(once_key); return true; } From 3a1cebe7e05027a1c96f2fc1a8eddf5f19b78f42 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 11 May 2014 23:01:13 +0200 Subject: [PATCH 084/178] ipv6: fix calculation of option len in ip6_append_data tot_len does specify the size of struct ipv6_txoptions. We need opt_flen + opt_nflen to calculate the overall length of additional ipv6 extensions. I found this while auditing the ipv6 output path for a memory corruption reported by Alexey Preobrazhensky while he fuzzed an instrumented AddressSanitizer kernel with trinity. This may or may not be the cause of the original bug. Fixes: 4df98e76cde7c6 ("ipv6: pmtudisc setting not respected with UFO/CORK") Reported-by: Alexey Preobrazhensky Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31a38bde69ef..fbf11562b54c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1229,7 +1229,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int maxnonfragsize, headersize; headersize = sizeof(struct ipv6hdr) + - (opt ? opt->tot_len : 0) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + (dst_allfrag(&rt->dst) ? sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; From b4b177a5556a686909e643f1e9b6434c10de079f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 May 2014 15:34:41 +0200 Subject: [PATCH 085/178] mac80211: fix on-channel remain-on-channel Jouni reported that if a remain-on-channel was active on the same channel as the current operating channel, then the ROC would start, but any frames transmitted using mgmt-tx on the same channel would get delayed until after the ROC. The reason for this is that the ROC starts, but doesn't have any handling for "remain on the same channel", so it stops the interface queues. The later mgmt-tx then puts the frame on the interface queues (since it's on the current operating channel) and thus they get delayed until after the ROC. To fix this, add some logic to handle remaining on the same channel specially and not stop the queues etc. in this case. This not only fixes the bug but also improves behaviour in this case as data frames etc. can continue to flow. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Tested-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/offchannel.c | 27 ++++++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 222c28b75315..f169b6ee94ee 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -317,6 +317,7 @@ struct ieee80211_roc_work { bool started, abort, hw_begun, notified; bool to_be_freed; + bool on_channel; unsigned long hw_start_time; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6fb38558a5e6..7a17decd27f9 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) container_of(work, struct ieee80211_roc_work, work.work); struct ieee80211_sub_if_data *sdata = roc->sdata; struct ieee80211_local *local = sdata->local; - bool started; + bool started, on_channel; mutex_lock(&local->mtx); @@ -354,14 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (!roc->started) { struct ieee80211_roc_work *dep; - /* start this ROC */ - ieee80211_offchannel_stop_vifs(local); + WARN_ON(local->use_chanctx); - /* switch channel etc */ + /* If actually operating on the desired channel (with at least + * 20 MHz channel width) don't stop all the operations but still + * treat it as though the ROC operation started properly, so + * other ROC operations won't interfere with this one. + */ + roc->on_channel = roc->chan == local->_oper_chandef.chan && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_10; + + /* start this ROC */ ieee80211_recalc_idle(local); - local->tmp_channel = roc->chan; - ieee80211_hw_config(local, 0); + if (!roc->on_channel) { + ieee80211_offchannel_stop_vifs(local); + + local->tmp_channel = roc->chan; + ieee80211_hw_config(local, 0); + } /* tell userspace or send frame */ ieee80211_handle_roc_started(roc); @@ -380,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; + on_channel = roc->on_channel; ieee80211_roc_notify_destroy(roc, !roc->abort); - if (started) { + if (started && !on_channel) { ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; From c4b160685fc85e41fe8c08478cc61f4877d26973 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 12 May 2014 10:38:18 -0400 Subject: [PATCH 086/178] jme: Fix unmap loop counting error: In my recent fix (76a691d0a: fix dma unmap warning), Ben Hutchings noted that my loop count was incorrect. Where j started at startidx, it should have started at zero, and gone on for count entries, not to endidx. Additionally, a DMA resource exhaustion should drop the frame and (for now), return NETDEV_TX_OK, not NETEV_TX_BUSY. This patch fixes both of those issues: Signed-off-by: Neil Horman CC: Ben Hutchings CC: "David S. Miller" CC: Guo-Fu Tseng Signed-off-by: David S. Miller --- drivers/net/ethernet/jme.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 6e664d9038d6..b78378cea5e3 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2027,14 +2027,14 @@ jme_fill_tx_map(struct pci_dev *pdev, return 0; } -static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int endidx) +static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int count) { struct jme_ring *txring = &(jme->txring[0]); struct jme_buffer_info *txbi = txring->bufinf, *ctxbi; int mask = jme->tx_ring_mask; int j; - for (j = startidx ; j < endidx ; ++j) { + for (j = 0 ; j < count ; j++) { ctxbi = txbi + ((startidx + j + 2) & (mask)); pci_unmap_page(jme->pdev, ctxbi->mapping, @@ -2069,7 +2069,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) skb_frag_page(frag), frag->page_offset, skb_frag_size(frag), hidma); if (ret) { - jme_drop_tx_map(jme, idx, idx+i); + jme_drop_tx_map(jme, idx, i); goto out; } @@ -2081,7 +2081,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, virt_to_page(skb->data), offset_in_page(skb->data), len, hidma); if (ret) - jme_drop_tx_map(jme, idx, idx+i); + jme_drop_tx_map(jme, idx, i); out: return ret; @@ -2269,7 +2269,7 @@ jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) } if (jme_fill_tx_desc(jme, skb, idx)) - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; jwrite32(jme, JME_TXCS, jme->reg_txcs | TXCS_SELECT_QUEUE0 | From 03a58baa785f48a85126ab043a14cb80b7e670e0 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 13 May 2014 14:03:11 +0530 Subject: [PATCH 087/178] be2net: enable interrupts in EEH resume On some BE3 FW versions, after a HW reset, interrupts will remain disabled for each function. So, explicitly enable the interrupts in the eeh_resume handler, else after an eeh recovery interrupts wouldn't work. Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a18645407d21..dc19bc5dec77 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4949,6 +4949,12 @@ static void be_eeh_resume(struct pci_dev *pdev) if (status) goto err; + /* On some BE3 FW versions, after a HW reset, + * interrupts will remain disabled for each function. + * So, explicitly enable interrupts + */ + be_intr_set(adapter, true); + /* tell fw we're ready to fire cmds */ status = be_cmd_fw_init(adapter); if (status) From f5738e2ef88070ef1372e6e718124d88e9abe4ac Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 13 May 2014 14:38:02 +0200 Subject: [PATCH 088/178] af_iucv: wrong mapping of sent and confirmed skbs When sending data through IUCV a MESSAGE COMPLETE interrupt signals that sent data memory can be freed or reused again. With commit f9c41a62bba3f3f7ef3541b2a025e3371bcbba97 "af_iucv: fix recvmsg by replacing skb_pull() function" the MESSAGE COMPLETE callback iucv_callback_txdone() identifies the wrong skb as being confirmed, which leads to data corruption. This patch fixes the skb mapping logic in iucv_callback_txdone(). Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Cc: Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 01e77b0ae075..8c9d7302c846 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1830,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { + if (msg->tag == IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } From faf1dc64e345ac4de5c4429df6ed492255ae2248 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 14 May 2014 14:39:21 +0530 Subject: [PATCH 089/178] ath9k_htc: Stop ANI before doing hw_reset During remain on channel request, ANI worker thread is not stopped before doing hw reset. This is causing kernel crash in hw_per_calibration. This change ensures that ANI is stopped before doing chip reset and it will be rescheduled later when the chip is configured back to home channel and having valid bss. Reported-by: David Herrmann Tested-by: David Herrmann Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index f46cd0250e48..5627917c5ff7 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -95,8 +95,10 @@ static void ath9k_htc_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif) if ((vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_MESH_POINT) && - bss_conf->enable_beacon) + bss_conf->enable_beacon) { priv->reconfig_beacon = true; + priv->rearm_ani = true; + } if (bss_conf->assoc) { priv->rearm_ani = true; @@ -257,6 +259,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv, ath9k_htc_ps_wakeup(priv); + ath9k_htc_stop_ani(priv); del_timer_sync(&priv->tx.cleanup_timer); ath9k_htc_tx_drain(priv); From e84d2f8d2ae33c8215429824e1ecf24cbca9645e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 May 2014 09:48:21 +0200 Subject: [PATCH 090/178] net: filter: s390: fix JIT address randomization This is the s390 variant of Alexei's JIT bug fix. (patch description below stolen from Alexei's patch) bpf_alloc_binary() adds 128 bytes of room to JITed program image and rounds it up to the nearest page size. If image size is close to page size (like 4000), it is rounded to two pages: round_up(4000 + 4 + 128) == 8192 then 'hole' is computed as 8192 - (4000 + 4) = 4188 If prandom_u32() % hole selects a number >= PAGE_SIZE - sizeof(*header) then kernel will crash during bpf_jit_free(): kernel BUG at arch/x86/mm/pageattr.c:887! Call Trace: [] change_page_attr_set_clr+0x135/0x460 [] ? _raw_spin_unlock_irq+0x30/0x50 [] set_memory_rw+0x2f/0x40 [] bpf_jit_free_deferred+0x2d/0x60 [] process_one_work+0x1d8/0x6a0 [] ? process_one_work+0x178/0x6a0 [] worker_thread+0x11c/0x370 since bpf_jit_free() does: unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; to compute start address of 'bpf_binary_header' and header->pages will pass junk to: set_memory_rw(addr, header->pages); Fix it by making sure that &header->image[prandom_u32() % hole] and &header are in the same page. Fixes: aa2d2c73c21f2 ("s390/bpf,jit: address randomize and write protect jit code") Reported-by: Alexei Starovoitov Cc: # v3.11+ Signed-off-by: Heiko Carstens Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 452d3ebd9d0f..e9f8fa9337fe 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -811,7 +811,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, return NULL; memset(header, 0, sz); header->pages = sz / PAGE_SIZE; - hole = sz - (bpfsize + sizeof(*header)); + hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); /* Insert random number of illegal instructions before BPF code * and make sure the first instruction starts at an even address. */ From 3b084e99a3fabaeb0f9c65a0806cde30f0b2835e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 15 May 2014 17:18:26 +0200 Subject: [PATCH 091/178] netfilter: nf_tables: fix trace of matching non-terminal rule Add the corresponding trace if we have a full match in a non-terminal rule. Note that the traces will look slightly different than in x_tables since the log message after all expressions have been evaluated (contrary to x_tables, that emits it before the target action). This manifests in two differences in nf_tables wrt. x_tables: 1) The rule that enables the tracing is included in the trace. 2) If the rule emits some log message, that is shown before the trace log message. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 421c36ac5145..345acfb1720b 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -144,8 +144,10 @@ next_rule: switch (data[NFT_REG_VERDICT].verdict) { case NFT_BREAK: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; - /* fall through */ + continue; case NFT_CONTINUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); continue; } break; From 16a4142363b11952d3aa76ac78004502c0c2fe6e Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 24 Apr 2014 03:44:25 +0800 Subject: [PATCH 092/178] batman-adv: fix indirect hard_iface NULL dereference If hard_iface is NULL and goto out is made batadv_hardif_free_ref() doesn't check for NULL before dereferencing it to get to refcount. Introduced in cb1c92ec37fb70543d133a1fa7d9b54d6f8a1ecd ("batman-adv: add debugfs support to view multiif tables"). Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner Acked-by: Antonio Quartulli Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 1785da37b82c..6a484514cd3e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1079,7 +1079,8 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); out: - batadv_hardif_free_ref(hard_iface); + if (hard_iface) + batadv_hardif_free_ref(hard_iface); return 0; } From be181015a189cd141398b761ba4e79d33fe69949 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 23 Apr 2014 14:05:16 +0200 Subject: [PATCH 093/178] batman-adv: fix reference counting imbalance while sending fragment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the new fragmentation code the batadv_frag_send_packet() function obtains a reference to the primary_if, but it does not release it upon return. This reference imbalance prevents the primary_if (and then the related netdevice) to be properly released on shut down. Fix this by releasing the primary_if in batadv_frag_send_packet(). Introduced by ee75ed88879af88558818a5c6609d85f60ff0df4 ("batman-adv: Fragment and send skbs larger than mtu") Cc: Martin Hundebøll Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner Acked-by: Martin Hundebøll --- net/batman-adv/fragmentation.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index bcc4bea632fa..f14e54a05691 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -418,12 +418,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) { struct batadv_priv *bat_priv; - struct batadv_hard_iface *primary_if; + struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; unsigned mtu = neigh_node->if_incoming->net_dev->mtu; unsigned header_size = sizeof(frag_header); unsigned max_fragment_size, max_packet_size; + bool ret = false; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -483,7 +484,11 @@ bool batadv_frag_send_packet(struct sk_buff *skb, skb->len + ETH_HLEN); batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); - return true; + ret = true; + out_err: - return false; + if (primary_if) + batadv_hardif_free_ref(primary_if); + + return ret; } From 377fe0f968b30a1a714fab53a908061914f30e26 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 2 May 2014 01:35:13 +0200 Subject: [PATCH 094/178] batman-adv: increase orig refcount when storing ref in gw_node A pointer to the orig_node representing a bat-gateway is stored in the gw_node->orig_node member, but the refcount for such orig_node is never increased. This leads to memory faults when gw_node->orig_node is accessed and the originator has already been freed. Fix this by increasing the refcount on gw_node creation and decreasing it on gw_node free. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/gateway_client.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index c835e137423b..90cff585b37d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -42,8 +42,10 @@ static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { - if (atomic_dec_and_test(&gw_node->refcount)) + if (atomic_dec_and_test(&gw_node->refcount)) { + batadv_orig_node_free_ref(gw_node->orig_node); kfree_rcu(gw_node, rcu); + } } static struct batadv_gw_node * @@ -406,10 +408,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (gateway->bandwidth_down == 0) return; - gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); - if (!gw_node) + if (!atomic_inc_not_zero(&orig_node->refcount)) return; + gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); + if (!gw_node) { + batadv_orig_node_free_ref(orig_node); + return; + } + INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; atomic_set(&gw_node->refcount, 1); From cc2f33860cea0e48ebec096130bd0f7c4bf6e0bc Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 29 Mar 2014 17:27:38 +0100 Subject: [PATCH 095/178] batman-adv: fix local TT check for outgoing arp requests in DAT Change introduced by 88e48d7b3340ef07b108eb8a8b3813dd093cc7f7 ("batman-adv: make DAT drop ARP requests targeting local clients") implements a check that prevents DAT from using the caching mechanism when the client that is supposed to provide a reply to an arp request is local. However change brought by be1db4f6615b5e6156c807ea8985171c215c2d57 ("batman-adv: make the Distributed ARP Table vlan aware") has not converted the above check into its vlan aware version thus making it useless when the local client is behind a vlan. Fix the behaviour by properly specifying the vlan when checking for a client being local or not. Reported-by: Simon Wunderlich Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/distributed-arp-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b25fd64d727b..aa5d4946d0d7 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -940,8 +940,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, * additional DAT answer may trigger kernel warnings about * a packet coming from the wrong port. */ - if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, - BATADV_NO_FLAGS)) { + if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, vid)) { ret = true; goto out; } From 200b916f3575bdf11609cb447661b8d5957b0bbf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 12 May 2014 15:11:20 -0700 Subject: [PATCH 096/178] rtnetlink: wait for unregistering devices in rtnl_link_unregister() From: Cong Wang commit 50624c934db18ab90 (net: Delay default_device_exit_batch until no devices are unregistering) introduced rtnl_lock_unregistering() for default_device_exit_batch(). Same race could happen we when rmmod a driver which calls rtnl_link_unregister() as we call dev->destructor without rtnl lock. For long term, I think we should clean up the mess of netdev_run_todo() and net namespce exit code. Cc: Eric W. Biederman Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++++ net/core/dev.c | 2 +- net/core/net_namespace.c | 2 +- net/core/rtnetlink.c | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e3e66ac0a52..953937ea5233 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -4,6 +4,7 @@ #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -22,6 +23,10 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); + +extern wait_queue_head_t netdev_unregistering_wq; +extern struct mutex net_mutex; + #ifdef CONFIG_PROVE_LOCKING extern int lockdep_rtnl_is_held(void); #else diff --git a/net/core/dev.c b/net/core/dev.c index c619b8641337..6da649bde4f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5541,7 +5541,7 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); -static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); +DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 81d3a9a08453..7c8ffd974961 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -24,7 +24,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9837bebf93ce..2d8d8fcfa060 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); +/* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace. + */ +static void rtnl_lock_unregistering_all(void) +{ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + for_each_net(net) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + /** * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { - rtnl_lock(); + /* Close the race with cleanup_net() */ + mutex_lock(&net_mutex); + rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); + mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); From 5513a510fad73594e29e1e48e760e0a644a8a4f3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 14 May 2014 21:57:26 -0400 Subject: [PATCH 097/178] nfsd4: fix corruption on setting an ACL. As of 06f9cc12caa862f5bc86ebdb4f77568a4bef0167 "nfsd4: don't create unnecessary mask acl", any non-trivial ACL will be left with an unitialized entry, and a trivial ACL may write one entry beyond what's allocated. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6f46013dddf..f66c66b9f182 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -590,7 +590,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) add_to_mask(state, &state->groups->aces[i].perms); } - if (!state->users->n && !state->groups->n) { + if (state->users->n || state->groups->n) { pace++; pace->e_tag = ACL_MASK; low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); From 898305806ad56ae11dc2c80931062e6a2c7bba48 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Wed, 14 May 2014 14:38:36 -0500 Subject: [PATCH 098/178] Altera TSE: Fix sparse errors and warnings This patch fixes the many sparse errors and warnings contained in the initial submission of the Altera Triple Speed Ethernet driver, and a few minor cppcheck warnings. Changes are tested on ARM and NIOS2 example designs, and compile tested against multiple architectures. Typical issues addressed were as follows: altera_tse_ethtool.c:136:19: warning: incorrect type in argument 1 (different address spaces) altera_tse_ethtool.c:136:19: expected void const volatile [noderef] *addr altera_tse_ethtool.c:136:19: got unsigned int * ... altera_sgdma.c:129:31: warning: cast removes address space of expression Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/Makefile | 1 + drivers/net/ethernet/altera/altera_msgdma.c | 108 +++++------ drivers/net/ethernet/altera/altera_msgdmahw.h | 13 +- drivers/net/ethernet/altera/altera_sgdma.c | 181 +++++++++--------- drivers/net/ethernet/altera/altera_sgdmahw.h | 26 +-- drivers/net/ethernet/altera/altera_tse.h | 47 +++++ .../net/ethernet/altera/altera_tse_ethtool.c | 108 +++++++---- drivers/net/ethernet/altera/altera_tse_main.c | 128 +++++++------ drivers/net/ethernet/altera/altera_utils.c | 20 +- drivers/net/ethernet/altera/altera_utils.h | 8 +- 10 files changed, 366 insertions(+), 274 deletions(-) diff --git a/drivers/net/ethernet/altera/Makefile b/drivers/net/ethernet/altera/Makefile index d4a187e45369..3eff2fd3997e 100644 --- a/drivers/net/ethernet/altera/Makefile +++ b/drivers/net/ethernet/altera/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_ALTERA_TSE) += altera_tse.o altera_tse-objs := altera_tse_main.o altera_tse_ethtool.o \ altera_msgdma.o altera_sgdma.o altera_utils.o +ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 4d1f2fdd5c32..0fb986ba3290 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -37,18 +37,16 @@ void msgdma_start_rxdma(struct altera_tse_private *priv) void msgdma_reset(struct altera_tse_private *priv) { int counter; - struct msgdma_csr *txcsr = - (struct msgdma_csr *)priv->tx_dma_csr; - struct msgdma_csr *rxcsr = - (struct msgdma_csr *)priv->rx_dma_csr; /* Reset Rx mSGDMA */ - iowrite32(MSGDMA_CSR_STAT_MASK, &rxcsr->status); - iowrite32(MSGDMA_CSR_CTL_RESET, &rxcsr->control); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->rx_dma_csr, + msgdma_csroffs(status)); + csrwr32(MSGDMA_CSR_CTL_RESET, priv->rx_dma_csr, + msgdma_csroffs(control)); counter = 0; while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - if (tse_bit_is_clear(&rxcsr->status, + if (tse_bit_is_clear(priv->rx_dma_csr, msgdma_csroffs(status), MSGDMA_CSR_STAT_RESETTING)) break; udelay(1); @@ -59,15 +57,18 @@ void msgdma_reset(struct altera_tse_private *priv) "TSE Rx mSGDMA resetting bit never cleared!\n"); /* clear all status bits */ - iowrite32(MSGDMA_CSR_STAT_MASK, &rxcsr->status); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->rx_dma_csr, msgdma_csroffs(status)); /* Reset Tx mSGDMA */ - iowrite32(MSGDMA_CSR_STAT_MASK, &txcsr->status); - iowrite32(MSGDMA_CSR_CTL_RESET, &txcsr->control); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->tx_dma_csr, + msgdma_csroffs(status)); + + csrwr32(MSGDMA_CSR_CTL_RESET, priv->tx_dma_csr, + msgdma_csroffs(control)); counter = 0; while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - if (tse_bit_is_clear(&txcsr->status, + if (tse_bit_is_clear(priv->tx_dma_csr, msgdma_csroffs(status), MSGDMA_CSR_STAT_RESETTING)) break; udelay(1); @@ -78,58 +79,58 @@ void msgdma_reset(struct altera_tse_private *priv) "TSE Tx mSGDMA resetting bit never cleared!\n"); /* clear all status bits */ - iowrite32(MSGDMA_CSR_STAT_MASK, &txcsr->status); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->tx_dma_csr, msgdma_csroffs(status)); } void msgdma_disable_rxirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->rx_dma_csr; - tse_clear_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_clear_bit(priv->rx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_enable_rxirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->rx_dma_csr; - tse_set_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_set_bit(priv->rx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_disable_txirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->tx_dma_csr; - tse_clear_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_clear_bit(priv->tx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_enable_txirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->tx_dma_csr; - tse_set_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_set_bit(priv->tx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_clear_rxirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->rx_dma_csr; - iowrite32(MSGDMA_CSR_STAT_IRQ, &csr->status); + csrwr32(MSGDMA_CSR_STAT_IRQ, priv->rx_dma_csr, msgdma_csroffs(status)); } void msgdma_clear_txirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->tx_dma_csr; - iowrite32(MSGDMA_CSR_STAT_IRQ, &csr->status); + csrwr32(MSGDMA_CSR_STAT_IRQ, priv->tx_dma_csr, msgdma_csroffs(status)); } /* return 0 to indicate transmit is pending */ int msgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) { - struct msgdma_extended_desc *desc = priv->tx_dma_desc; - - iowrite32(lower_32_bits(buffer->dma_addr), &desc->read_addr_lo); - iowrite32(upper_32_bits(buffer->dma_addr), &desc->read_addr_hi); - iowrite32(0, &desc->write_addr_lo); - iowrite32(0, &desc->write_addr_hi); - iowrite32(buffer->len, &desc->len); - iowrite32(0, &desc->burst_seq_num); - iowrite32(MSGDMA_DESC_TX_STRIDE, &desc->stride); - iowrite32(MSGDMA_DESC_CTL_TX_SINGLE, &desc->control); + csrwr32(lower_32_bits(buffer->dma_addr), priv->tx_dma_desc, + msgdma_descroffs(read_addr_lo)); + csrwr32(upper_32_bits(buffer->dma_addr), priv->tx_dma_desc, + msgdma_descroffs(read_addr_hi)); + csrwr32(0, priv->tx_dma_desc, msgdma_descroffs(write_addr_lo)); + csrwr32(0, priv->tx_dma_desc, msgdma_descroffs(write_addr_hi)); + csrwr32(buffer->len, priv->tx_dma_desc, msgdma_descroffs(len)); + csrwr32(0, priv->tx_dma_desc, msgdma_descroffs(burst_seq_num)); + csrwr32(MSGDMA_DESC_TX_STRIDE, priv->tx_dma_desc, + msgdma_descroffs(stride)); + csrwr32(MSGDMA_DESC_CTL_TX_SINGLE, priv->tx_dma_desc, + msgdma_descroffs(control)); return 0; } @@ -138,17 +139,16 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) u32 ready = 0; u32 inuse; u32 status; - struct msgdma_csr *txcsr = - (struct msgdma_csr *)priv->tx_dma_csr; /* Get number of sent descriptors */ - inuse = ioread32(&txcsr->rw_fill_level) & 0xffff; + inuse = csrrd32(priv->tx_dma_csr, msgdma_csroffs(rw_fill_level)) + & 0xffff; if (inuse) { /* Tx FIFO is not empty */ ready = priv->tx_prod - priv->tx_cons - inuse - 1; } else { /* Check for buffered last packet */ - status = ioread32(&txcsr->status); + status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); if (status & MSGDMA_CSR_STAT_BUSY) ready = priv->tx_prod - priv->tx_cons - 1; else @@ -162,7 +162,6 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) void msgdma_add_rx_desc(struct altera_tse_private *priv, struct tse_buffer *rxbuffer) { - struct msgdma_extended_desc *desc = priv->rx_dma_desc; u32 len = priv->rx_dma_buf_sz; dma_addr_t dma_addr = rxbuffer->dma_addr; u32 control = (MSGDMA_DESC_CTL_END_ON_EOP @@ -172,14 +171,16 @@ void msgdma_add_rx_desc(struct altera_tse_private *priv, | MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO); - iowrite32(0, &desc->read_addr_lo); - iowrite32(0, &desc->read_addr_hi); - iowrite32(lower_32_bits(dma_addr), &desc->write_addr_lo); - iowrite32(upper_32_bits(dma_addr), &desc->write_addr_hi); - iowrite32(len, &desc->len); - iowrite32(0, &desc->burst_seq_num); - iowrite32(0x00010001, &desc->stride); - iowrite32(control, &desc->control); + csrwr32(0, priv->rx_dma_desc, msgdma_descroffs(read_addr_lo)); + csrwr32(0, priv->rx_dma_desc, msgdma_descroffs(read_addr_hi)); + csrwr32(lower_32_bits(dma_addr), priv->rx_dma_desc, + msgdma_descroffs(write_addr_lo)); + csrwr32(upper_32_bits(dma_addr), priv->rx_dma_desc, + msgdma_descroffs(write_addr_hi)); + csrwr32(len, priv->rx_dma_desc, msgdma_descroffs(len)); + csrwr32(0, priv->rx_dma_desc, msgdma_descroffs(burst_seq_num)); + csrwr32(0x00010001, priv->rx_dma_desc, msgdma_descroffs(stride)); + csrwr32(control, priv->rx_dma_desc, msgdma_descroffs(control)); } /* status is returned on upper 16 bits, @@ -190,14 +191,13 @@ u32 msgdma_rx_status(struct altera_tse_private *priv) u32 rxstatus = 0; u32 pktlength; u32 pktstatus; - struct msgdma_csr *rxcsr = - (struct msgdma_csr *)priv->rx_dma_csr; - struct msgdma_response *rxresp = - (struct msgdma_response *)priv->rx_dma_resp; - if (ioread32(&rxcsr->resp_fill_level) & 0xffff) { - pktlength = ioread32(&rxresp->bytes_transferred); - pktstatus = ioread32(&rxresp->status); + if (csrrd32(priv->rx_dma_csr, msgdma_csroffs(resp_fill_level)) + & 0xffff) { + pktlength = csrrd32(priv->rx_dma_resp, + msgdma_respoffs(bytes_transferred)); + pktstatus = csrrd32(priv->rx_dma_resp, + msgdma_respoffs(status)); rxstatus = pktstatus; rxstatus = rxstatus << 16; rxstatus |= (pktlength & 0xffff); diff --git a/drivers/net/ethernet/altera/altera_msgdmahw.h b/drivers/net/ethernet/altera/altera_msgdmahw.h index d7b59ba4019c..e335626e1b6b 100644 --- a/drivers/net/ethernet/altera/altera_msgdmahw.h +++ b/drivers/net/ethernet/altera/altera_msgdmahw.h @@ -17,15 +17,6 @@ #ifndef __ALTERA_MSGDMAHW_H__ #define __ALTERA_MSGDMAHW_H__ -/* mSGDMA standard descriptor format - */ -struct msgdma_desc { - u32 read_addr; /* data buffer source address */ - u32 write_addr; /* data buffer destination address */ - u32 len; /* the number of bytes to transfer per descriptor */ - u32 control; /* characteristics of the transfer */ -}; - /* mSGDMA extended descriptor format */ struct msgdma_extended_desc { @@ -159,6 +150,10 @@ struct msgdma_response { u32 status; }; +#define msgdma_respoffs(a) (offsetof(struct msgdma_response, a)) +#define msgdma_csroffs(a) (offsetof(struct msgdma_csr, a)) +#define msgdma_descroffs(a) (offsetof(struct msgdma_extended_desc, a)) + /* mSGDMA response register bit definitions */ #define MSGDMA_RESP_EARLY_TERM BIT(8) diff --git a/drivers/net/ethernet/altera/altera_sgdma.c b/drivers/net/ethernet/altera/altera_sgdma.c index 9ce8630692b6..99cc56f451cf 100644 --- a/drivers/net/ethernet/altera/altera_sgdma.c +++ b/drivers/net/ethernet/altera/altera_sgdma.c @@ -20,8 +20,8 @@ #include "altera_sgdmahw.h" #include "altera_sgdma.h" -static void sgdma_setup_descrip(struct sgdma_descrip *desc, - struct sgdma_descrip *ndesc, +static void sgdma_setup_descrip(struct sgdma_descrip __iomem *desc, + struct sgdma_descrip __iomem *ndesc, dma_addr_t ndesc_phys, dma_addr_t raddr, dma_addr_t waddr, @@ -31,17 +31,17 @@ static void sgdma_setup_descrip(struct sgdma_descrip *desc, int wfixed); static int sgdma_async_write(struct altera_tse_private *priv, - struct sgdma_descrip *desc); + struct sgdma_descrip __iomem *desc); static int sgdma_async_read(struct altera_tse_private *priv); static dma_addr_t sgdma_txphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc); + struct sgdma_descrip __iomem *desc); static dma_addr_t sgdma_rxphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc); + struct sgdma_descrip __iomem *desc); static int sgdma_txbusy(struct altera_tse_private *priv); @@ -79,7 +79,8 @@ int sgdma_initialize(struct altera_tse_private *priv) priv->rxdescphys = (dma_addr_t) 0; priv->txdescphys = (dma_addr_t) 0; - priv->rxdescphys = dma_map_single(priv->device, priv->rx_dma_desc, + priv->rxdescphys = dma_map_single(priv->device, + (void __force *)priv->rx_dma_desc, priv->rxdescmem, DMA_BIDIRECTIONAL); if (dma_mapping_error(priv->device, priv->rxdescphys)) { @@ -88,7 +89,8 @@ int sgdma_initialize(struct altera_tse_private *priv) return -EINVAL; } - priv->txdescphys = dma_map_single(priv->device, priv->tx_dma_desc, + priv->txdescphys = dma_map_single(priv->device, + (void __force *)priv->tx_dma_desc, priv->txdescmem, DMA_TO_DEVICE); if (dma_mapping_error(priv->device, priv->txdescphys)) { @@ -98,8 +100,8 @@ int sgdma_initialize(struct altera_tse_private *priv) } /* Initialize descriptor memory to all 0's, sync memory to cache */ - memset(priv->tx_dma_desc, 0, priv->txdescmem); - memset(priv->rx_dma_desc, 0, priv->rxdescmem); + memset_io(priv->tx_dma_desc, 0, priv->txdescmem); + memset_io(priv->rx_dma_desc, 0, priv->rxdescmem); dma_sync_single_for_device(priv->device, priv->txdescphys, priv->txdescmem, DMA_TO_DEVICE); @@ -126,22 +128,15 @@ void sgdma_uninitialize(struct altera_tse_private *priv) */ void sgdma_reset(struct altera_tse_private *priv) { - u32 *ptxdescripmem = (u32 *)priv->tx_dma_desc; - u32 txdescriplen = priv->txdescmem; - u32 *prxdescripmem = (u32 *)priv->rx_dma_desc; - u32 rxdescriplen = priv->rxdescmem; - struct sgdma_csr *ptxsgdma = (struct sgdma_csr *)priv->tx_dma_csr; - struct sgdma_csr *prxsgdma = (struct sgdma_csr *)priv->rx_dma_csr; - /* Initialize descriptor memory to 0 */ - memset(ptxdescripmem, 0, txdescriplen); - memset(prxdescripmem, 0, rxdescriplen); + memset_io(priv->tx_dma_desc, 0, priv->txdescmem); + memset_io(priv->rx_dma_desc, 0, priv->rxdescmem); - iowrite32(SGDMA_CTRLREG_RESET, &ptxsgdma->control); - iowrite32(0, &ptxsgdma->control); + csrwr32(SGDMA_CTRLREG_RESET, priv->tx_dma_csr, sgdma_csroffs(control)); + csrwr32(0, priv->tx_dma_csr, sgdma_csroffs(control)); - iowrite32(SGDMA_CTRLREG_RESET, &prxsgdma->control); - iowrite32(0, &prxsgdma->control); + csrwr32(SGDMA_CTRLREG_RESET, priv->rx_dma_csr, sgdma_csroffs(control)); + csrwr32(0, priv->rx_dma_csr, sgdma_csroffs(control)); } /* For SGDMA, interrupts remain enabled after initially enabling, @@ -167,14 +162,14 @@ void sgdma_disable_txirq(struct altera_tse_private *priv) void sgdma_clear_rxirq(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - tse_set_bit(&csr->control, SGDMA_CTRLREG_CLRINT); + tse_set_bit(priv->rx_dma_csr, sgdma_csroffs(control), + SGDMA_CTRLREG_CLRINT); } void sgdma_clear_txirq(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr; - tse_set_bit(&csr->control, SGDMA_CTRLREG_CLRINT); + tse_set_bit(priv->tx_dma_csr, sgdma_csroffs(control), + SGDMA_CTRLREG_CLRINT); } /* transmits buffer through SGDMA. Returns number of buffers @@ -184,12 +179,11 @@ void sgdma_clear_txirq(struct altera_tse_private *priv) */ int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) { - int pktstx = 0; - struct sgdma_descrip *descbase = - (struct sgdma_descrip *)priv->tx_dma_desc; + struct sgdma_descrip __iomem *descbase = + (struct sgdma_descrip __iomem *)priv->tx_dma_desc; - struct sgdma_descrip *cdesc = &descbase[0]; - struct sgdma_descrip *ndesc = &descbase[1]; + struct sgdma_descrip __iomem *cdesc = &descbase[0]; + struct sgdma_descrip __iomem *ndesc = &descbase[1]; /* wait 'til the tx sgdma is ready for the next transmit request */ if (sgdma_txbusy(priv)) @@ -205,7 +199,7 @@ int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) 0, /* read fixed */ SGDMA_CONTROL_WR_FIXED); /* Generate SOP */ - pktstx = sgdma_async_write(priv, cdesc); + sgdma_async_write(priv, cdesc); /* enqueue the request to the pending transmit queue */ queue_tx(priv, buffer); @@ -219,10 +213,10 @@ int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) u32 sgdma_tx_completions(struct altera_tse_private *priv) { u32 ready = 0; - struct sgdma_descrip *desc = (struct sgdma_descrip *)priv->tx_dma_desc; if (!sgdma_txbusy(priv) && - ((desc->control & SGDMA_CONTROL_HW_OWNED) == 0) && + ((csrrd8(priv->tx_dma_desc, sgdma_descroffs(control)) + & SGDMA_CONTROL_HW_OWNED) == 0) && (dequeue_tx(priv))) { ready = 1; } @@ -246,32 +240,31 @@ void sgdma_add_rx_desc(struct altera_tse_private *priv, */ u32 sgdma_rx_status(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - struct sgdma_descrip *base = (struct sgdma_descrip *)priv->rx_dma_desc; - struct sgdma_descrip *desc = NULL; - int pktsrx; - unsigned int rxstatus = 0; - unsigned int pktlength = 0; - unsigned int pktstatus = 0; + struct sgdma_descrip __iomem *base = + (struct sgdma_descrip __iomem *)priv->rx_dma_desc; + struct sgdma_descrip __iomem *desc = NULL; struct tse_buffer *rxbuffer = NULL; + unsigned int rxstatus = 0; - u32 sts = ioread32(&csr->status); + u32 sts = csrrd32(priv->rx_dma_csr, sgdma_csroffs(status)); desc = &base[0]; if (sts & SGDMA_STSREG_EOP) { + unsigned int pktlength = 0; + unsigned int pktstatus = 0; dma_sync_single_for_cpu(priv->device, priv->rxdescphys, priv->sgdmadesclen, DMA_FROM_DEVICE); - pktlength = desc->bytes_xferred; - pktstatus = desc->status & 0x3f; - rxstatus = pktstatus; + pktlength = csrrd16(desc, sgdma_descroffs(bytes_xferred)); + pktstatus = csrrd8(desc, sgdma_descroffs(status)); + rxstatus = pktstatus & ~SGDMA_STATUS_EOP; rxstatus = rxstatus << 16; rxstatus |= (pktlength & 0xffff); if (rxstatus) { - desc->status = 0; + csrwr8(0, desc, sgdma_descroffs(status)); rxbuffer = dequeue_rx(priv); if (rxbuffer == NULL) @@ -279,12 +272,12 @@ u32 sgdma_rx_status(struct altera_tse_private *priv) "sgdma rx and rx queue empty!\n"); /* Clear control */ - iowrite32(0, &csr->control); + csrwr32(0, priv->rx_dma_csr, sgdma_csroffs(control)); /* clear status */ - iowrite32(0xf, &csr->status); + csrwr32(0xf, priv->rx_dma_csr, sgdma_csroffs(status)); /* kick the rx sgdma after reaping this descriptor */ - pktsrx = sgdma_async_read(priv); + sgdma_async_read(priv); } else { /* If the SGDMA indicated an end of packet on recv, @@ -298,10 +291,11 @@ u32 sgdma_rx_status(struct altera_tse_private *priv) */ netdev_err(priv->dev, "SGDMA RX Error Info: %x, %x, %x\n", - sts, desc->status, rxstatus); + sts, csrrd8(desc, sgdma_descroffs(status)), + rxstatus); } } else if (sts == 0) { - pktsrx = sgdma_async_read(priv); + sgdma_async_read(priv); } return rxstatus; @@ -309,8 +303,8 @@ u32 sgdma_rx_status(struct altera_tse_private *priv) /* Private functions */ -static void sgdma_setup_descrip(struct sgdma_descrip *desc, - struct sgdma_descrip *ndesc, +static void sgdma_setup_descrip(struct sgdma_descrip __iomem *desc, + struct sgdma_descrip __iomem *ndesc, dma_addr_t ndesc_phys, dma_addr_t raddr, dma_addr_t waddr, @@ -320,27 +314,30 @@ static void sgdma_setup_descrip(struct sgdma_descrip *desc, int wfixed) { /* Clear the next descriptor as not owned by hardware */ - u32 ctrl = ndesc->control; - ctrl &= ~SGDMA_CONTROL_HW_OWNED; - ndesc->control = ctrl; - ctrl = 0; + u32 ctrl = csrrd8(ndesc, sgdma_descroffs(control)); + ctrl &= ~SGDMA_CONTROL_HW_OWNED; + csrwr8(ctrl, ndesc, sgdma_descroffs(control)); + ctrl = SGDMA_CONTROL_HW_OWNED; ctrl |= generate_eop; ctrl |= rfixed; ctrl |= wfixed; /* Channel is implicitly zero, initialized to 0 by default */ + csrwr32(lower_32_bits(raddr), desc, sgdma_descroffs(raddr)); + csrwr32(lower_32_bits(waddr), desc, sgdma_descroffs(waddr)); - desc->raddr = raddr; - desc->waddr = waddr; - desc->next = lower_32_bits(ndesc_phys); - desc->control = ctrl; - desc->status = 0; - desc->rburst = 0; - desc->wburst = 0; - desc->bytes = length; - desc->bytes_xferred = 0; + csrwr32(0, desc, sgdma_descroffs(pad1)); + csrwr32(0, desc, sgdma_descroffs(pad2)); + csrwr32(lower_32_bits(ndesc_phys), desc, sgdma_descroffs(next)); + + csrwr8(ctrl, desc, sgdma_descroffs(control)); + csrwr8(0, desc, sgdma_descroffs(status)); + csrwr8(0, desc, sgdma_descroffs(wburst)); + csrwr8(0, desc, sgdma_descroffs(rburst)); + csrwr16(length, desc, sgdma_descroffs(bytes)); + csrwr16(0, desc, sgdma_descroffs(bytes_xferred)); } /* If hardware is busy, don't restart async read. @@ -351,12 +348,11 @@ static void sgdma_setup_descrip(struct sgdma_descrip *desc, */ static int sgdma_async_read(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - struct sgdma_descrip *descbase = - (struct sgdma_descrip *)priv->rx_dma_desc; + struct sgdma_descrip __iomem *descbase = + (struct sgdma_descrip __iomem *)priv->rx_dma_desc; - struct sgdma_descrip *cdesc = &descbase[0]; - struct sgdma_descrip *ndesc = &descbase[1]; + struct sgdma_descrip __iomem *cdesc = &descbase[0]; + struct sgdma_descrip __iomem *ndesc = &descbase[1]; struct tse_buffer *rxbuffer = NULL; @@ -382,11 +378,13 @@ static int sgdma_async_read(struct altera_tse_private *priv) priv->sgdmadesclen, DMA_TO_DEVICE); - iowrite32(lower_32_bits(sgdma_rxphysaddr(priv, cdesc)), - &csr->next_descrip); + csrwr32(lower_32_bits(sgdma_rxphysaddr(priv, cdesc)), + priv->rx_dma_csr, + sgdma_csroffs(next_descrip)); - iowrite32((priv->rxctrlreg | SGDMA_CTRLREG_START), - &csr->control); + csrwr32((priv->rxctrlreg | SGDMA_CTRLREG_START), + priv->rx_dma_csr, + sgdma_csroffs(control)); return 1; } @@ -395,32 +393,32 @@ static int sgdma_async_read(struct altera_tse_private *priv) } static int sgdma_async_write(struct altera_tse_private *priv, - struct sgdma_descrip *desc) + struct sgdma_descrip __iomem *desc) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr; - if (sgdma_txbusy(priv)) return 0; /* clear control and status */ - iowrite32(0, &csr->control); - iowrite32(0x1f, &csr->status); + csrwr32(0, priv->tx_dma_csr, sgdma_csroffs(control)); + csrwr32(0x1f, priv->tx_dma_csr, sgdma_csroffs(status)); dma_sync_single_for_device(priv->device, priv->txdescphys, priv->sgdmadesclen, DMA_TO_DEVICE); - iowrite32(lower_32_bits(sgdma_txphysaddr(priv, desc)), - &csr->next_descrip); + csrwr32(lower_32_bits(sgdma_txphysaddr(priv, desc)), + priv->tx_dma_csr, + sgdma_csroffs(next_descrip)); - iowrite32((priv->txctrlreg | SGDMA_CTRLREG_START), - &csr->control); + csrwr32((priv->txctrlreg | SGDMA_CTRLREG_START), + priv->tx_dma_csr, + sgdma_csroffs(control)); return 1; } static dma_addr_t sgdma_txphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc) + struct sgdma_descrip __iomem *desc) { dma_addr_t paddr = priv->txdescmem_busaddr; uintptr_t offs = (uintptr_t)desc - (uintptr_t)priv->tx_dma_desc; @@ -429,7 +427,7 @@ sgdma_txphysaddr(struct altera_tse_private *priv, static dma_addr_t sgdma_rxphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc) + struct sgdma_descrip __iomem *desc) { dma_addr_t paddr = priv->rxdescmem_busaddr; uintptr_t offs = (uintptr_t)desc - (uintptr_t)priv->rx_dma_desc; @@ -518,8 +516,8 @@ queue_rx_peekhead(struct altera_tse_private *priv) */ static int sgdma_rxbusy(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - return ioread32(&csr->status) & SGDMA_STSREG_BUSY; + return csrrd32(priv->rx_dma_csr, sgdma_csroffs(status)) + & SGDMA_STSREG_BUSY; } /* waits for the tx sgdma to finish it's current operation, returns 0 @@ -528,13 +526,14 @@ static int sgdma_rxbusy(struct altera_tse_private *priv) static int sgdma_txbusy(struct altera_tse_private *priv) { int delay = 0; - struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr; /* if DMA is busy, wait for current transactino to finish */ - while ((ioread32(&csr->status) & SGDMA_STSREG_BUSY) && (delay++ < 100)) + while ((csrrd32(priv->tx_dma_csr, sgdma_csroffs(status)) + & SGDMA_STSREG_BUSY) && (delay++ < 100)) udelay(1); - if (ioread32(&csr->status) & SGDMA_STSREG_BUSY) { + if (csrrd32(priv->tx_dma_csr, sgdma_csroffs(status)) + & SGDMA_STSREG_BUSY) { netdev_err(priv->dev, "timeout waiting for tx dma\n"); return 1; } diff --git a/drivers/net/ethernet/altera/altera_sgdmahw.h b/drivers/net/ethernet/altera/altera_sgdmahw.h index ba3334f35383..85bc33b218d9 100644 --- a/drivers/net/ethernet/altera/altera_sgdmahw.h +++ b/drivers/net/ethernet/altera/altera_sgdmahw.h @@ -19,16 +19,16 @@ /* SGDMA descriptor structure */ struct sgdma_descrip { - unsigned int raddr; /* address of data to be read */ - unsigned int pad1; - unsigned int waddr; - unsigned int pad2; - unsigned int next; - unsigned int pad3; - unsigned short bytes; - unsigned char rburst; - unsigned char wburst; - unsigned short bytes_xferred; /* 16 bits, bytes xferred */ + u32 raddr; /* address of data to be read */ + u32 pad1; + u32 waddr; + u32 pad2; + u32 next; + u32 pad3; + u16 bytes; + u8 rburst; + u8 wburst; + u16 bytes_xferred; /* 16 bits, bytes xferred */ /* bit 0: error * bit 1: length error @@ -39,7 +39,7 @@ struct sgdma_descrip { * bit 6: reserved * bit 7: status eop for recv case */ - unsigned char status; + u8 status; /* bit 0: eop * bit 1: read_fixed @@ -47,7 +47,7 @@ struct sgdma_descrip { * bits 3,4,5,6: Channel (always 0) * bit 7: hardware owned */ - unsigned char control; + u8 control; } __packed; @@ -101,6 +101,8 @@ struct sgdma_csr { u32 pad3[3]; }; +#define sgdma_csroffs(a) (offsetof(struct sgdma_csr, a)) +#define sgdma_descroffs(a) (offsetof(struct sgdma_descrip, a)) #define SGDMA_STSREG_ERR BIT(0) /* Error */ #define SGDMA_STSREG_EOP BIT(1) /* EOP */ diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h index 465c4aabebbd..2adb24d4523c 100644 --- a/drivers/net/ethernet/altera/altera_tse.h +++ b/drivers/net/ethernet/altera/altera_tse.h @@ -357,6 +357,8 @@ struct altera_tse_mac { u32 reserved5[42]; }; +#define tse_csroffs(a) (offsetof(struct altera_tse_mac, a)) + /* Transmit and Receive Command Registers Bit Definitions */ #define ALTERA_TSE_TX_CMD_STAT_OMIT_CRC BIT(17) @@ -487,4 +489,49 @@ struct altera_tse_private { */ void altera_tse_set_ethtool_ops(struct net_device *); +static inline +u32 csrrd32(void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + return readl(paddr); +} + +static inline +u16 csrrd16(void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + return readw(paddr); +} + +static inline +u8 csrrd8(void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + return readb(paddr); +} + +static inline +void csrwr32(u32 val, void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + + writel(val, paddr); +} + +static inline +void csrwr16(u16 val, void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + + writew(val, paddr); +} + +static inline +void csrwr8(u8 val, void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + + writeb(val, paddr); +} + #endif /* __ALTERA_TSE_H__ */ diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c index 76133caffa78..54c25eff7952 100644 --- a/drivers/net/ethernet/altera/altera_tse_ethtool.c +++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c @@ -96,54 +96,89 @@ static void tse_fill_stats(struct net_device *dev, struct ethtool_stats *dummy, u64 *buf) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; u64 ext; - buf[0] = ioread32(&mac->frames_transmitted_ok); - buf[1] = ioread32(&mac->frames_received_ok); - buf[2] = ioread32(&mac->frames_check_sequence_errors); - buf[3] = ioread32(&mac->alignment_errors); + buf[0] = csrrd32(priv->mac_dev, + tse_csroffs(frames_transmitted_ok)); + buf[1] = csrrd32(priv->mac_dev, + tse_csroffs(frames_received_ok)); + buf[2] = csrrd32(priv->mac_dev, + tse_csroffs(frames_check_sequence_errors)); + buf[3] = csrrd32(priv->mac_dev, + tse_csroffs(alignment_errors)); /* Extended aOctetsTransmittedOK counter */ - ext = (u64) ioread32(&mac->msb_octets_transmitted_ok) << 32; - ext |= ioread32(&mac->octets_transmitted_ok); + ext = (u64) csrrd32(priv->mac_dev, + tse_csroffs(msb_octets_transmitted_ok)) << 32; + + ext |= csrrd32(priv->mac_dev, + tse_csroffs(octets_transmitted_ok)); buf[4] = ext; /* Extended aOctetsReceivedOK counter */ - ext = (u64) ioread32(&mac->msb_octets_received_ok) << 32; - ext |= ioread32(&mac->octets_received_ok); + ext = (u64) csrrd32(priv->mac_dev, + tse_csroffs(msb_octets_received_ok)) << 32; + + ext |= csrrd32(priv->mac_dev, + tse_csroffs(octets_received_ok)); buf[5] = ext; - buf[6] = ioread32(&mac->tx_pause_mac_ctrl_frames); - buf[7] = ioread32(&mac->rx_pause_mac_ctrl_frames); - buf[8] = ioread32(&mac->if_in_errors); - buf[9] = ioread32(&mac->if_out_errors); - buf[10] = ioread32(&mac->if_in_ucast_pkts); - buf[11] = ioread32(&mac->if_in_multicast_pkts); - buf[12] = ioread32(&mac->if_in_broadcast_pkts); - buf[13] = ioread32(&mac->if_out_discards); - buf[14] = ioread32(&mac->if_out_ucast_pkts); - buf[15] = ioread32(&mac->if_out_multicast_pkts); - buf[16] = ioread32(&mac->if_out_broadcast_pkts); - buf[17] = ioread32(&mac->ether_stats_drop_events); + buf[6] = csrrd32(priv->mac_dev, + tse_csroffs(tx_pause_mac_ctrl_frames)); + buf[7] = csrrd32(priv->mac_dev, + tse_csroffs(rx_pause_mac_ctrl_frames)); + buf[8] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_errors)); + buf[9] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_errors)); + buf[10] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_ucast_pkts)); + buf[11] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_multicast_pkts)); + buf[12] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_broadcast_pkts)); + buf[13] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_discards)); + buf[14] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_ucast_pkts)); + buf[15] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_multicast_pkts)); + buf[16] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_broadcast_pkts)); + buf[17] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_drop_events)); /* Extended etherStatsOctets counter */ - ext = (u64) ioread32(&mac->msb_ether_stats_octets) << 32; - ext |= ioread32(&mac->ether_stats_octets); + ext = (u64) csrrd32(priv->mac_dev, + tse_csroffs(msb_ether_stats_octets)) << 32; + ext |= csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_octets)); buf[18] = ext; - buf[19] = ioread32(&mac->ether_stats_pkts); - buf[20] = ioread32(&mac->ether_stats_undersize_pkts); - buf[21] = ioread32(&mac->ether_stats_oversize_pkts); - buf[22] = ioread32(&mac->ether_stats_pkts_64_octets); - buf[23] = ioread32(&mac->ether_stats_pkts_65to127_octets); - buf[24] = ioread32(&mac->ether_stats_pkts_128to255_octets); - buf[25] = ioread32(&mac->ether_stats_pkts_256to511_octets); - buf[26] = ioread32(&mac->ether_stats_pkts_512to1023_octets); - buf[27] = ioread32(&mac->ether_stats_pkts_1024to1518_octets); - buf[28] = ioread32(&mac->ether_stats_pkts_1519tox_octets); - buf[29] = ioread32(&mac->ether_stats_jabbers); - buf[30] = ioread32(&mac->ether_stats_fragments); + buf[19] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts)); + buf[20] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_undersize_pkts)); + buf[21] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_oversize_pkts)); + buf[22] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_64_octets)); + buf[23] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_65to127_octets)); + buf[24] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_128to255_octets)); + buf[25] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_256to511_octets)); + buf[26] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_512to1023_octets)); + buf[27] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_1024to1518_octets)); + buf[28] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_1519tox_octets)); + buf[29] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_jabbers)); + buf[30] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_fragments)); } static int tse_sset_count(struct net_device *dev, int sset) @@ -178,7 +213,6 @@ static void tse_get_regs(struct net_device *dev, struct ethtool_regs *regs, { int i; struct altera_tse_private *priv = netdev_priv(dev); - u32 *tse_mac_regs = (u32 *)priv->mac_dev; u32 *buf = regbuf; /* Set version to a known value, so ethtool knows @@ -196,7 +230,7 @@ static void tse_get_regs(struct net_device *dev, struct ethtool_regs *regs, regs->version = 1; for (i = 0; i < TSE_NUM_REGS; i++) - buf[i] = ioread32(&tse_mac_regs[i]); + buf[i] = csrrd32(priv->mac_dev, i * 4); } static int tse_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index e44a4aeb9701..ecc3b4e4588b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -100,29 +100,30 @@ static inline u32 tse_tx_avail(struct altera_tse_private *priv) */ static int altera_tse_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { - struct altera_tse_mac *mac = (struct altera_tse_mac *)bus->priv; - unsigned int *mdio_regs = (unsigned int *)&mac->mdio_phy0; - u32 data; + struct net_device *ndev = bus->priv; + struct altera_tse_private *priv = netdev_priv(ndev); /* set MDIO address */ - iowrite32((mii_id & 0x1f), &mac->mdio_phy0_addr); + csrwr32((mii_id & 0x1f), priv->mac_dev, + tse_csroffs(mdio_phy0_addr)); /* get the data */ - data = ioread32(&mdio_regs[regnum]) & 0xffff; - return data; + return csrrd32(priv->mac_dev, + tse_csroffs(mdio_phy0) + regnum * 4) & 0xffff; } static int altera_tse_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { - struct altera_tse_mac *mac = (struct altera_tse_mac *)bus->priv; - unsigned int *mdio_regs = (unsigned int *)&mac->mdio_phy0; + struct net_device *ndev = bus->priv; + struct altera_tse_private *priv = netdev_priv(ndev); /* set MDIO address */ - iowrite32((mii_id & 0x1f), &mac->mdio_phy0_addr); + csrwr32((mii_id & 0x1f), priv->mac_dev, + tse_csroffs(mdio_phy0_addr)); /* write the data */ - iowrite32((u32) value, &mdio_regs[regnum]); + csrwr32(value, priv->mac_dev, tse_csroffs(mdio_phy0) + regnum * 4); return 0; } @@ -168,7 +169,7 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) for (i = 0; i < PHY_MAX_ADDR; i++) mdio->irq[i] = PHY_POLL; - mdio->priv = priv->mac_dev; + mdio->priv = dev; mdio->parent = priv->device; ret = of_mdiobus_register(mdio, mdio_node); @@ -563,7 +564,6 @@ static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int nopaged_len = skb_headlen(skb); enum netdev_tx ret = NETDEV_TX_OK; dma_addr_t dma_addr; - int txcomplete = 0; spin_lock_bh(&priv->tx_lock); @@ -599,7 +599,7 @@ static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_sync_single_for_device(priv->device, buffer->dma_addr, buffer->len, DMA_TO_DEVICE); - txcomplete = priv->dmaops->tx_buffer(priv, buffer); + priv->dmaops->tx_buffer(priv, buffer); skb_tx_timestamp(skb); @@ -698,7 +698,6 @@ static struct phy_device *connect_local_phy(struct net_device *dev) struct altera_tse_private *priv = netdev_priv(dev); struct phy_device *phydev = NULL; char phy_id_fmt[MII_BUS_ID_SIZE + 3]; - int ret; if (priv->phy_addr != POLL_PHY) { snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, @@ -712,6 +711,7 @@ static struct phy_device *connect_local_phy(struct net_device *dev) netdev_err(dev, "Could not attach to PHY\n"); } else { + int ret; phydev = phy_find_first(priv->mdio); if (phydev == NULL) { netdev_err(dev, "No PHY found\n"); @@ -791,7 +791,6 @@ static int init_phy(struct net_device *dev) static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) { - struct altera_tse_mac *mac = priv->mac_dev; u32 msb; u32 lsb; @@ -799,8 +798,8 @@ static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) lsb = ((addr[5] << 8) | addr[4]) & 0xffff; /* Set primary MAC address */ - iowrite32(msb, &mac->mac_addr_0); - iowrite32(lsb, &mac->mac_addr_1); + csrwr32(msb, priv->mac_dev, tse_csroffs(mac_addr_0)); + csrwr32(lsb, priv->mac_dev, tse_csroffs(mac_addr_1)); } /* MAC software reset. @@ -811,26 +810,26 @@ static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) */ static int reset_mac(struct altera_tse_private *priv) { - void __iomem *cmd_cfg_reg = &priv->mac_dev->command_config; int counter; u32 dat; - dat = ioread32(cmd_cfg_reg); + dat = csrrd32(priv->mac_dev, tse_csroffs(command_config)); dat &= ~(MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA); dat |= MAC_CMDCFG_SW_RESET | MAC_CMDCFG_CNT_RESET; - iowrite32(dat, cmd_cfg_reg); + csrwr32(dat, priv->mac_dev, tse_csroffs(command_config)); counter = 0; while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - if (tse_bit_is_clear(cmd_cfg_reg, MAC_CMDCFG_SW_RESET)) + if (tse_bit_is_clear(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_SW_RESET)) break; udelay(1); } if (counter >= ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - dat = ioread32(cmd_cfg_reg); + dat = csrrd32(priv->mac_dev, tse_csroffs(command_config)); dat &= ~MAC_CMDCFG_SW_RESET; - iowrite32(dat, cmd_cfg_reg); + csrwr32(dat, priv->mac_dev, tse_csroffs(command_config)); return -1; } return 0; @@ -840,41 +839,57 @@ static int reset_mac(struct altera_tse_private *priv) */ static int init_mac(struct altera_tse_private *priv) { - struct altera_tse_mac *mac = priv->mac_dev; unsigned int cmd = 0; u32 frm_length; /* Setup Rx FIFO */ - iowrite32(priv->rx_fifo_depth - ALTERA_TSE_RX_SECTION_EMPTY, - &mac->rx_section_empty); - iowrite32(ALTERA_TSE_RX_SECTION_FULL, &mac->rx_section_full); - iowrite32(ALTERA_TSE_RX_ALMOST_EMPTY, &mac->rx_almost_empty); - iowrite32(ALTERA_TSE_RX_ALMOST_FULL, &mac->rx_almost_full); + csrwr32(priv->rx_fifo_depth - ALTERA_TSE_RX_SECTION_EMPTY, + priv->mac_dev, tse_csroffs(rx_section_empty)); + + csrwr32(ALTERA_TSE_RX_SECTION_FULL, priv->mac_dev, + tse_csroffs(rx_section_full)); + + csrwr32(ALTERA_TSE_RX_ALMOST_EMPTY, priv->mac_dev, + tse_csroffs(rx_almost_empty)); + + csrwr32(ALTERA_TSE_RX_ALMOST_FULL, priv->mac_dev, + tse_csroffs(rx_almost_full)); /* Setup Tx FIFO */ - iowrite32(priv->tx_fifo_depth - ALTERA_TSE_TX_SECTION_EMPTY, - &mac->tx_section_empty); - iowrite32(ALTERA_TSE_TX_SECTION_FULL, &mac->tx_section_full); - iowrite32(ALTERA_TSE_TX_ALMOST_EMPTY, &mac->tx_almost_empty); - iowrite32(ALTERA_TSE_TX_ALMOST_FULL, &mac->tx_almost_full); + csrwr32(priv->tx_fifo_depth - ALTERA_TSE_TX_SECTION_EMPTY, + priv->mac_dev, tse_csroffs(tx_section_empty)); + + csrwr32(ALTERA_TSE_TX_SECTION_FULL, priv->mac_dev, + tse_csroffs(tx_section_full)); + + csrwr32(ALTERA_TSE_TX_ALMOST_EMPTY, priv->mac_dev, + tse_csroffs(tx_almost_empty)); + + csrwr32(ALTERA_TSE_TX_ALMOST_FULL, priv->mac_dev, + tse_csroffs(tx_almost_full)); /* MAC Address Configuration */ tse_update_mac_addr(priv, priv->dev->dev_addr); /* MAC Function Configuration */ frm_length = ETH_HLEN + priv->dev->mtu + ETH_FCS_LEN; - iowrite32(frm_length, &mac->frm_length); - iowrite32(ALTERA_TSE_TX_IPG_LENGTH, &mac->tx_ipg_length); + csrwr32(frm_length, priv->mac_dev, tse_csroffs(frm_length)); + + csrwr32(ALTERA_TSE_TX_IPG_LENGTH, priv->mac_dev, + tse_csroffs(tx_ipg_length)); /* Disable RX/TX shift 16 for alignment of all received frames on 16-bit * start address */ - tse_set_bit(&mac->rx_cmd_stat, ALTERA_TSE_RX_CMD_STAT_RX_SHIFT16); - tse_clear_bit(&mac->tx_cmd_stat, ALTERA_TSE_TX_CMD_STAT_TX_SHIFT16 | - ALTERA_TSE_TX_CMD_STAT_OMIT_CRC); + tse_set_bit(priv->mac_dev, tse_csroffs(rx_cmd_stat), + ALTERA_TSE_RX_CMD_STAT_RX_SHIFT16); + + tse_clear_bit(priv->mac_dev, tse_csroffs(tx_cmd_stat), + ALTERA_TSE_TX_CMD_STAT_TX_SHIFT16 | + ALTERA_TSE_TX_CMD_STAT_OMIT_CRC); /* Set the MAC options */ - cmd = ioread32(&mac->command_config); + cmd = csrrd32(priv->mac_dev, tse_csroffs(command_config)); cmd &= ~MAC_CMDCFG_PAD_EN; /* No padding Removal on Receive */ cmd &= ~MAC_CMDCFG_CRC_FWD; /* CRC Removal */ cmd |= MAC_CMDCFG_RX_ERR_DISC; /* Automatically discard frames @@ -889,9 +904,10 @@ static int init_mac(struct altera_tse_private *priv) cmd &= ~MAC_CMDCFG_ETH_SPEED; cmd &= ~MAC_CMDCFG_ENA_10; - iowrite32(cmd, &mac->command_config); + csrwr32(cmd, priv->mac_dev, tse_csroffs(command_config)); - iowrite32(ALTERA_TSE_PAUSE_QUANTA, &mac->pause_quanta); + csrwr32(ALTERA_TSE_PAUSE_QUANTA, priv->mac_dev, + tse_csroffs(pause_quanta)); if (netif_msg_hw(priv)) dev_dbg(priv->device, @@ -904,15 +920,14 @@ static int init_mac(struct altera_tse_private *priv) */ static void tse_set_mac(struct altera_tse_private *priv, bool enable) { - struct altera_tse_mac *mac = priv->mac_dev; - u32 value = ioread32(&mac->command_config); + u32 value = csrrd32(priv->mac_dev, tse_csroffs(command_config)); if (enable) value |= MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA; else value &= ~(MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA); - iowrite32(value, &mac->command_config); + csrwr32(value, priv->mac_dev, tse_csroffs(command_config)); } /* Change the MTU @@ -942,13 +957,12 @@ static int tse_change_mtu(struct net_device *dev, int new_mtu) static void altera_tse_set_mcfilter(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; int i; struct netdev_hw_addr *ha; /* clear the hash filter */ for (i = 0; i < 64; i++) - iowrite32(0, &(mac->hash_table[i])); + csrwr32(0, priv->mac_dev, tse_csroffs(hash_table) + i * 4); netdev_for_each_mc_addr(ha, dev) { unsigned int hash = 0; @@ -964,7 +978,7 @@ static void altera_tse_set_mcfilter(struct net_device *dev) hash = (hash << 1) | xor_bit; } - iowrite32(1, &(mac->hash_table[hash])); + csrwr32(1, priv->mac_dev, tse_csroffs(hash_table) + hash * 4); } } @@ -972,12 +986,11 @@ static void altera_tse_set_mcfilter(struct net_device *dev) static void altera_tse_set_mcfilterall(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; int i; /* set the hash filter */ for (i = 0; i < 64; i++) - iowrite32(1, &(mac->hash_table[i])); + csrwr32(1, priv->mac_dev, tse_csroffs(hash_table) + i * 4); } /* Set or clear the multicast filter for this adaptor @@ -985,12 +998,12 @@ static void altera_tse_set_mcfilterall(struct net_device *dev) static void tse_set_rx_mode_hashfilter(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; spin_lock(&priv->mac_cfg_lock); if (dev->flags & IFF_PROMISC) - tse_set_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN); + tse_set_bit(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_PROMIS_EN); if (dev->flags & IFF_ALLMULTI) altera_tse_set_mcfilterall(dev); @@ -1005,15 +1018,16 @@ static void tse_set_rx_mode_hashfilter(struct net_device *dev) static void tse_set_rx_mode(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; spin_lock(&priv->mac_cfg_lock); if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev) || !netdev_uc_empty(dev)) - tse_set_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN); + tse_set_bit(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_PROMIS_EN); else - tse_clear_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN); + tse_clear_bit(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_PROMIS_EN); spin_unlock(&priv->mac_cfg_lock); } @@ -1493,7 +1507,7 @@ static int altera_tse_remove(struct platform_device *pdev) return 0; } -struct altera_dmaops altera_dtype_sgdma = { +static const struct altera_dmaops altera_dtype_sgdma = { .altera_dtype = ALTERA_DTYPE_SGDMA, .dmamask = 32, .reset_dma = sgdma_reset, @@ -1512,7 +1526,7 @@ struct altera_dmaops altera_dtype_sgdma = { .start_rxdma = sgdma_start_rxdma, }; -struct altera_dmaops altera_dtype_msgdma = { +static const struct altera_dmaops altera_dtype_msgdma = { .altera_dtype = ALTERA_DTYPE_MSGDMA, .dmamask = 64, .reset_dma = msgdma_reset, diff --git a/drivers/net/ethernet/altera/altera_utils.c b/drivers/net/ethernet/altera/altera_utils.c index 70fa13f486b2..d7eeb1713ad2 100644 --- a/drivers/net/ethernet/altera/altera_utils.c +++ b/drivers/net/ethernet/altera/altera_utils.c @@ -17,28 +17,28 @@ #include "altera_tse.h" #include "altera_utils.h" -void tse_set_bit(void __iomem *ioaddr, u32 bit_mask) +void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); value |= bit_mask; - iowrite32(value, ioaddr); + csrwr32(value, ioaddr, offs); } -void tse_clear_bit(void __iomem *ioaddr, u32 bit_mask) +void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); value &= ~bit_mask; - iowrite32(value, ioaddr); + csrwr32(value, ioaddr, offs); } -int tse_bit_is_set(void __iomem *ioaddr, u32 bit_mask) +int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); return (value & bit_mask) ? 1 : 0; } -int tse_bit_is_clear(void __iomem *ioaddr, u32 bit_mask) +int tse_bit_is_clear(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); return (value & bit_mask) ? 0 : 1; } diff --git a/drivers/net/ethernet/altera/altera_utils.h b/drivers/net/ethernet/altera/altera_utils.h index ce1db36d3583..baf100ccf587 100644 --- a/drivers/net/ethernet/altera/altera_utils.h +++ b/drivers/net/ethernet/altera/altera_utils.h @@ -19,9 +19,9 @@ #ifndef __ALTERA_UTILS_H__ #define __ALTERA_UTILS_H__ -void tse_set_bit(void __iomem *ioaddr, u32 bit_mask); -void tse_clear_bit(void __iomem *ioaddr, u32 bit_mask); -int tse_bit_is_set(void __iomem *ioaddr, u32 bit_mask); -int tse_bit_is_clear(void __iomem *ioaddr, u32 bit_mask); +void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); +void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); +int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask); +int tse_bit_is_clear(void __iomem *ioaddr, size_t offs, u32 bit_mask); #endif /* __ALTERA_UTILS_H__*/ From d91e5c02555ae838e9d783b9e83e125fd10dca73 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Wed, 14 May 2014 14:38:37 -0500 Subject: [PATCH 099/178] Altera TSE: Disable Multicast filtering to workaround problem This patch disables multicast hash filtering if present in the hardware and uses promiscuous mode instead until the problem with multicast filtering has been debugged, integrated and tested. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/altera_tse_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index ecc3b4e4588b..7330681574d2 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1376,6 +1376,11 @@ static int altera_tse_probe(struct platform_device *pdev) of_property_read_bool(pdev->dev.of_node, "altr,has-hash-multicast-filter"); + /* Set hash filter to not set for now until the + * multicast filter receive issue is debugged + */ + priv->hash_filter = 0; + /* get supplemental address settings for this instance */ priv->added_unicast = of_property_read_bool(pdev->dev.of_node, From b394745df2d9d4c30bf1bcc55773bec6f3bc7c67 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 14 May 2014 13:12:49 -0700 Subject: [PATCH 100/178] net: phy: Don't call phy_resume if phy_init_hw failed After the call to phy_init_hw failed in phy_attach_direct, phy_detach is called to detach the phy device from its network device. If the attached driver is a generic phy driver, this also detaches the driver. Subsequently phy_resume is called, which assumes without checking that a driver is attached to the device. This will result in a crash such as Unable to handle kernel paging request for data at address 0xffffffffffffff90 Faulting instruction address: 0xc0000000003a0e18 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c0000000003a0e18] .phy_attach_direct+0x68/0x17c LR [c0000000003a0e6c] .phy_attach_direct+0xbc/0x17c Call Trace: [c0000003fc0475d0] [c0000000003a0e6c] .phy_attach_direct+0xbc/0x17c (unreliable) [c0000003fc047670] [c0000000003a0ff8] .phy_connect_direct+0x28/0x98 [c0000003fc047700] [c0000000003f0074] .of_phy_connect+0x4c/0xa4 Only call phy_resume if phy_init_hw was successful. Signed-off-by: Guenter Roeck Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0ce606624296..4987a1c6dc52 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -614,8 +614,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, err = phy_init_hw(phydev); if (err) phy_detach(phydev); - - phy_resume(phydev); + else + phy_resume(phydev); return err; } From be7a010d6fa33dca9327ad8e91844278dfd1e712 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Thu, 15 May 2014 15:56:14 +0800 Subject: [PATCH 101/178] ipv6: update Destination Cache entries when gateway turn into host RFC 4861 states in 7.2.5: The IsRouter flag in the cache entry MUST be set based on the Router flag in the received advertisement. In those cases where the IsRouter flag changes from TRUE to FALSE as a result of this update, the node MUST remove that router from the Default Router List and update the Destination Cache entries for all destinations using that neighbor as a router as specified in Section 7.3.3. This is needed to detect when a node that is used as a router stops forwarding packets due to being configured as a host. Currently, when dealing with NA Message which IsRouter flag changes from TRUE to FALSE, the kernel only removes router from the Default Router List, and don't update the Destination Cache entries. Now in order to update those Destination Cache entries, i introduce function rt6_clean_tohost(). Signed-off-by: Duan Jiong Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/ndisc.c | 7 ++----- net/ipv6/route.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6c4f5eac98e7..216cecce65e9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,6 +127,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg); void rt6_ifdown(struct net *net, struct net_device *dev); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); /* diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 09a22f4f36c9..ca8d4ea48a5d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -851,7 +851,7 @@ out: static void ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); - const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; + struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + @@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* * Change: router to host */ - struct rt6_info *rt; - rt = rt6_get_dflt_router(saddr, dev); - if (rt) - ip6_del_rt(rt); + rt6_clean_tohost(dev_net(dev), saddr); } out: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 004fffb6c221..58c449ad7f6e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2234,6 +2234,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) fib6_clean_all(net, fib6_remove_prefsrc, &adni); } +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +/* Remove routers and update dst entries when gateway turn into host. */ +static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +{ + struct in6_addr *gateway = (struct in6_addr *)arg; + + if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || + ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + return -1; + } + return 0; +} + +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) +{ + fib6_clean_all(net, fib6_clean_tohost, gateway); +} + struct arg_dev_net { struct net_device *dev; struct net *net; From 583757446ba6850eff96cef6565d729266da9c5b Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Thu, 15 May 2014 11:08:34 +0100 Subject: [PATCH 102/178] xen-netback: Fix grant ref resolution in RX path The original series for reintroducing grant mapping for netback had a patch [1] to handle receiving of packets from an another VIF. Grant copy on the receiving side needs the grant ref of the page to set up the op. The original patch assumed (wrongly) that the frags array haven't changed. In the case reported by Sander, the sending guest sent a packet where the linear buffer and the first frag were under PKT_PROT_LEN (=128) bytes. xenvif_tx_submit() then pulled up the linear area to 128 bytes, and ditched the first frag. The receiving side had an off-by-one problem when gathered the grant refs. This patch fixes that by checking whether the actual frag's page pointer is the same as the page in the original frag list. It can handle any kind of changes on the original frags array, like: - removing granted frags from the array at any point - adding local pages to the frags list anywhere - reordering the frags It's optimized to the most common case, when there is 1:1 relation between the frags and the list, plus works optimal when frags are removed from the end or the beginning. [1]: 3e2234: xen-netback: Handle foreign mapped pages on the guest RX path Reported-by: Sander Eikelenboom Signed-off-by: Zoltan Kiss Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 98 +++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 18 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 76665405c5aa..64ab1d141f1c 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,7 +104,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, /* Find the containing VIF's structure from a pointer in pending_tx_info array */ -static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf) +static inline struct xenvif *ubuf_to_vif(const struct ubuf_info *ubuf) { u16 pending_idx = ubuf->desc; struct pending_tx_info *temp = @@ -322,6 +322,35 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, } } +/* + * Find the grant ref for a given frag in a chain of struct ubuf_info's + * skb: the skb itself + * i: the frag's number + * ubuf: a pointer to an element in the chain. It should not be NULL + * + * Returns a pointer to the element in the chain where the page were found. If + * not found, returns NULL. + * See the definition of callback_struct in common.h for more details about + * the chain. + */ +static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb, + const int i, + const struct ubuf_info *ubuf) +{ + struct xenvif *foreign_vif = ubuf_to_vif(ubuf); + + do { + u16 pending_idx = ubuf->desc; + + if (skb_shinfo(skb)->frags[i].page.p == + foreign_vif->mmap_pages[pending_idx]) + break; + ubuf = (struct ubuf_info *) ubuf->ctx; + } while (ubuf); + + return ubuf; +} + /* * Prepare an SKB to be transmitted to the frontend. * @@ -346,9 +375,8 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; - grant_ref_t foreign_grefs[MAX_SKB_FRAGS]; - struct xenvif *foreign_vif = NULL; + const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; + const struct ubuf_info *const head_ubuf = ubuf; old_meta_prod = npo->meta_prod; @@ -386,19 +414,6 @@ static int xenvif_gop_skb(struct sk_buff *skb, npo->copy_off = 0; npo->copy_gref = req->gref; - if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && - (ubuf->callback == &xenvif_zerocopy_callback)) { - int i = 0; - foreign_vif = ubuf_to_vif(ubuf); - - do { - u16 pending_idx = ubuf->desc; - foreign_grefs[i++] = - foreign_vif->pending_tx_info[pending_idx].req.gref; - ubuf = (struct ubuf_info *) ubuf->ctx; - } while (ubuf); - } - data = skb->data; while (data < skb_tail_pointer(skb)) { unsigned int offset = offset_in_page(data); @@ -415,13 +430,60 @@ static int xenvif_gop_skb(struct sk_buff *skb, } for (i = 0; i < nr_frags; i++) { + /* This variable also signals whether foreign_gref has a real + * value or not. + */ + struct xenvif *foreign_vif = NULL; + grant_ref_t foreign_gref; + + if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && + (ubuf->callback == &xenvif_zerocopy_callback)) { + const struct ubuf_info *const startpoint = ubuf; + + /* Ideally ubuf points to the chain element which + * belongs to this frag. Or if frags were removed from + * the beginning, then shortly before it. + */ + ubuf = xenvif_find_gref(skb, i, ubuf); + + /* Try again from the beginning of the list, if we + * haven't tried from there. This only makes sense in + * the unlikely event of reordering the original frags. + * For injected local pages it's an unnecessary second + * run. + */ + if (unlikely(!ubuf) && startpoint != head_ubuf) + ubuf = xenvif_find_gref(skb, i, head_ubuf); + + if (likely(ubuf)) { + u16 pending_idx = ubuf->desc; + + foreign_vif = ubuf_to_vif(ubuf); + foreign_gref = foreign_vif->pending_tx_info[pending_idx].req.gref; + /* Just a safety measure. If this was the last + * element on the list, the for loop will + * iterate again if a local page were added to + * the end. Using head_ubuf here prevents the + * second search on the chain. Or the original + * frags changed order, but that's less likely. + * In any way, ubuf shouldn't be NULL. + */ + ubuf = ubuf->ctx ? + (struct ubuf_info *) ubuf->ctx : + head_ubuf; + } else + /* This frag was a local page, added to the + * array after the skb left netback. + */ + ubuf = head_ubuf; + } xenvif_gop_frag_copy(vif, skb, npo, skb_frag_page(&skb_shinfo(skb)->frags[i]), skb_frag_size(&skb_shinfo(skb)->frags[i]), skb_shinfo(skb)->frags[i].page_offset, &head, foreign_vif, - foreign_grefs[i]); + foreign_vif ? foreign_gref : UINT_MAX); } return npo->meta_prod - old_meta_prod; From 81c708068dfedece038e07d818ba68333d8d885d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 15 May 2014 13:35:23 +0200 Subject: [PATCH 103/178] bonding: fix out of range parameters for bond_intmax_tbl I've missed to add a NULL entry to the bond_intmax_tbl when I introduced it with the conversion of arp_interval so add it now. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Fixes: 7bdb04ed0dbf ("bonding: convert arp_interval to use the new option API") Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 724e30fa20b9..832070298446 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -125,6 +125,7 @@ static const struct bond_opt_value bond_fail_over_mac_tbl[] = { static const struct bond_opt_value bond_intmax_tbl[] = { { "off", 0, BOND_VALFLAG_DEFAULT}, { "maxval", INT_MAX, BOND_VALFLAG_MAX}, + { NULL, -1, 0} }; static const struct bond_opt_value bond_lacp_rate_tbl[] = { From ce8d9e0d6746ff67c1870386b7121a4448f21130 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 May 2014 15:29:27 +0300 Subject: [PATCH 104/178] net/mlx4_core: Add UPDATE_QP SRIOV wrapper support This patch adds UPDATE_QP SRIOV wrapper support. The mechanism is a general one, but currently only source MAC index changes are allowed for VFs. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 4 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +++ drivers/net/ethernet/mellanox/mlx4/qp.c | 35 ++++++++++++ .../ethernet/mellanox/mlx4/resource_tracker.c | 54 +++++++++++++++++++ include/linux/mlx4/qp.h | 11 ++++ 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78099eab7673..92d3249f63f1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1253,12 +1253,12 @@ static struct mlx4_cmd_info cmd_info[] = { }, { .opcode = MLX4_CMD_UPDATE_QP, - .has_inbox = false, + .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_CMD_EPERM_wrapper + .wrapper = mlx4_UPDATE_QP_wrapper }, { .opcode = MLX4_CMD_GET_OP_REQ, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..212cea440f90 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1195,6 +1195,12 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); + int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 61d64ebffd56..fbd32af89c7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -389,6 +389,41 @@ err_icm: EXPORT_SYMBOL_GPL(mlx4_qp_alloc); +#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC +int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, + enum mlx4_update_qp_attr attr, + struct mlx4_update_qp_params *params) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_update_qp_context *cmd; + u64 pri_addr_path_mask = 0; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cmd = (struct mlx4_update_qp_context *)mailbox->buf; + + if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS)) + return -EINVAL; + + if (attr & MLX4_UPDATE_QP_SMAC) { + pri_addr_path_mask |= 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX; + cmd->qp_context.pri_path.grh_mylmc = params->smac_index; + } + + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); + + err = mlx4_cmd(dev, mailbox->dma, qp->qpn & 0xffffff, 0, + MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_update_qp); + void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..8f1254a79832 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3895,6 +3895,60 @@ static int add_eth_header(struct mlx4_dev *dev, int slave, } +#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX) +int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd_info) +{ + int err; + u32 qpn = vhcr->in_modifier & 0xffffff; + struct res_qp *rqp; + u64 mac; + unsigned port; + u64 pri_addr_path_mask; + struct mlx4_update_qp_context *cmd; + int smac_index; + + cmd = (struct mlx4_update_qp_context *)inbox->buf; + + pri_addr_path_mask = be64_to_cpu(cmd->primary_addr_path_mask); + if (cmd->qp_mask || cmd->secondary_addr_path_mask || + (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED)) + return -EPERM; + + /* Just change the smac for the QP */ + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) { + mlx4_err(dev, "Updating qpn 0x%x for slave %d rejected\n", qpn, slave); + return err; + } + + port = (rqp->sched_queue >> 6 & 1) + 1; + smac_index = cmd->qp_context.pri_path.grh_mylmc; + err = mac_find_smac_ix_in_slave(dev, slave, port, + smac_index, &mac); + if (err) { + mlx4_err(dev, "Failed to update qpn 0x%x, MAC is invalid. smac_ix: %d\n", + qpn, smac_index); + goto err_mac; + } + + err = mlx4_cmd(dev, inbox->dma, + vhcr->in_modifier, 0, + MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Failed to update qpn on qpn 0x%x, command failed\n", qpn); + goto err_mac; + } + +err_mac: + put_res(dev, slave, qpn, RES_QP); + return err; +} + int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index b66e7610d4ee..7040dc98ff8b 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -421,6 +421,17 @@ struct mlx4_wqe_inline_seg { __be32 byte_count; }; +enum mlx4_update_qp_attr { + MLX4_UPDATE_QP_SMAC = 1 << 0, +}; + +struct mlx4_update_qp_params { + u8 smac_index; +}; + +int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, + enum mlx4_update_qp_attr attr, + struct mlx4_update_qp_params *params); int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, From 9433c188915c1383ee36259119bc3a9c6f98cfc3 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 May 2014 15:29:28 +0300 Subject: [PATCH 105/178] IB/mlx4: Invoke UPDATE_QP for proxy QP1 on MAC changes When we receive a netdev event indicating a netdev change and/or a netdev address change, we must change the MAC index used by the proxy QP1 (in the QP context), otherwise RoCE CM packets sent by the VF will not carry the same source MAC address as the non-CM packets. We use the UPDATE_QP command to perform this change. In order to avoid modifying a QP context based on netdev event, while the driver attempts to destroy this QP (e.g either the mlx4_ib or ib_mad modules are unloaded), we use mutex locking in both flows. Since the relevant mlx4 proxy GSI QP is created indirectly by the mad module when they create their GSI QP, the mlx4 didn't need to keep track on that QP prior to this change. Now, when QP modifications are needed to this QP from within the driver, we added refernece to it. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 67 ++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 ++ drivers/infiniband/hw/mlx4/qp.c | 8 ++++ 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1b6dbe156a37..199c7896f081 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -48,6 +48,7 @@ #include #include +#include #include "mlx4_ib.h" #include "user.h" @@ -1614,6 +1615,53 @@ static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, } #endif +#define MLX4_IB_INVALID_MAC ((u64)-1) +static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + int port) +{ + u64 new_smac = 0; + u64 release_mac = MLX4_IB_INVALID_MAC; + struct mlx4_ib_qp *qp; + + read_lock(&dev_base_lock); + new_smac = mlx4_mac_to_u64(dev->dev_addr); + read_unlock(&dev_base_lock); + + mutex_lock(&ibdev->qp1_proxy_lock[port - 1]); + qp = ibdev->qp1_proxy[port - 1]; + if (qp) { + int new_smac_index; + u64 old_smac = qp->pri.smac; + struct mlx4_update_qp_params update_params; + + if (new_smac == old_smac) + goto unlock; + + new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac); + + if (new_smac_index < 0) + goto unlock; + + update_params.smac_index = new_smac_index; + if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC, + &update_params)) { + release_mac = new_smac; + goto unlock; + } + + qp->pri.smac = new_smac; + qp->pri.smac_index = new_smac_index; + + release_mac = old_smac; + } + +unlock: + mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); + if (release_mac != MLX4_IB_INVALID_MAC) + mlx4_unregister_mac(ibdev->dev, port, release_mac); +} + static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port) { @@ -1689,9 +1737,13 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) return 0; } -static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + unsigned long event) + { struct mlx4_ib_iboe *iboe; + int update_qps_port = -1; int port; iboe = &ibdev->iboe; @@ -1719,6 +1771,11 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) } curr_master = iboe->masters[port - 1]; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || + event == NETDEV_UP || event == NETDEV_CHANGE)) + update_qps_port = port; + if (curr_netdev) { port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; @@ -1752,6 +1809,9 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) } spin_unlock(&iboe->lock); + + if (update_qps_port > 0) + mlx4_ib_update_qps(ibdev, dev, update_qps_port); } static int mlx4_ib_netdev_event(struct notifier_block *this, @@ -1764,7 +1824,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, return NOTIFY_DONE; ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); - mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_scan_netdevs(ibdev, dev, event); return NOTIFY_DONE; } @@ -2043,6 +2103,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_map; for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->qp1_proxy_lock[i]); if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); @@ -2126,7 +2187,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 1 ; i <= ibdev->num_ports ; ++i) reset_gid_table(ibdev, i); rtnl_lock(); - mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_scan_netdevs(ibdev, NULL, 0); rtnl_unlock(); mlx4_ib_init_gid_table(ibdev); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f589522fddfd..66b0b7dbd9f4 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -522,6 +522,9 @@ struct mlx4_ib_dev { int steer_qpn_count; int steer_qpn_base; int steering_support; + struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; + /* lock when destroying qp1_proxy and getting netdev events */ + struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; }; struct ib_event_work { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..dc57482ae7af 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1132,6 +1132,12 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); + if (dev->qp1_proxy[mqp->port - 1] == mqp) { + mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); + dev->qp1_proxy[mqp->port - 1] = NULL; + mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); + } + pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); @@ -1646,6 +1652,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); if (err) return -EINVAL; + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) + dev->qp1_proxy[qp->port - 1] = qp; } } } From 6e14a5eeb158215881ef4507833a3574d0dbad19 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Thu, 15 May 2014 13:35:34 +0800 Subject: [PATCH 106/178] net: phy: resume phydev when going to RESUMING With commit be9dad1f9f26604fb ("net: phy: suspend phydev when going to HALTED"), an unused PHY device will be put in a low-power mode using BMCR_PDOWN. Some Ethernet drivers might be calling phy_start() and phy_stop() from ndo_open and ndo_close() respectively, while calling phy_connect() and phy_disconnect() from probe and remove. In such a case, the PHY will be powered down during the phy_stop() call, but will fail to be powered up in phy_start(). This patch fixes this scenario. Signed-off-by: Jiancheng Xue Signed-off-by: Zhangfei Gao Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a972056b2249..3bc079a67a3d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -715,7 +715,7 @@ void phy_state_machine(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct phy_device *phydev = container_of(dwork, struct phy_device, state_queue); - int needs_aneg = 0, do_suspend = 0; + bool needs_aneg = false, do_suspend = false, do_resume = false; int err = 0; mutex_lock(&phydev->lock); @@ -727,7 +727,7 @@ void phy_state_machine(struct work_struct *work) case PHY_PENDING: break; case PHY_UP: - needs_aneg = 1; + needs_aneg = true; phydev->link_timeout = PHY_AN_TIMEOUT; @@ -757,7 +757,7 @@ void phy_state_machine(struct work_struct *work) phydev->adjust_link(phydev->attached_dev); } else if (0 == phydev->link_timeout--) - needs_aneg = 1; + needs_aneg = true; break; case PHY_NOLINK: err = phy_read_status(phydev); @@ -791,7 +791,7 @@ void phy_state_machine(struct work_struct *work) netif_carrier_on(phydev->attached_dev); } else { if (0 == phydev->link_timeout--) - needs_aneg = 1; + needs_aneg = true; } phydev->adjust_link(phydev->attached_dev); @@ -827,7 +827,7 @@ void phy_state_machine(struct work_struct *work) phydev->link = 0; netif_carrier_off(phydev->attached_dev); phydev->adjust_link(phydev->attached_dev); - do_suspend = 1; + do_suspend = true; } break; case PHY_RESUMING: @@ -876,6 +876,7 @@ void phy_state_machine(struct work_struct *work) } phydev->adjust_link(phydev->attached_dev); } + do_resume = true; break; } @@ -883,9 +884,10 @@ void phy_state_machine(struct work_struct *work) if (needs_aneg) err = phy_start_aneg(phydev); - - if (do_suspend) + else if (do_suspend) phy_suspend(phydev); + else if (do_resume) + phy_resume(phydev); if (err < 0) phy_error(phydev); From fde0133b9cfa4e01b275e942ffc32fd78e27d27c Mon Sep 17 00:00:00 2001 From: Nathaniel W Filardo Date: Thu, 15 May 2014 15:51:22 +0100 Subject: [PATCH 107/178] af_rxrpc: Fix XDR length check in rxrpc key demarshalling. There may be padding on the ticket contained in the key payload, so just ensure that the claimed token length is large enough, rather than exactly the right size. Signed-off-by: Nathaniel Wesley Filardo Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 7633a752c65e..0ad080790a32 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -99,7 +99,7 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, _debug("tktlen: %x", tktlen); if (tktlen > AFSTOKEN_RK_TIX_MAX) return -EKEYREJECTED; - if (8 * 4 + tktlen != toklen) + if (toklen < 8 * 4 + tktlen) return -EKEYREJECTED; plen = sizeof(*token) + sizeof(*token->kad) + tktlen; From 0d08fceb2e21c30ca3e1e462e678723f806acf18 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 16 May 2014 12:26:04 +0100 Subject: [PATCH 108/178] xen-netback: fix race between napi_complete() and interrupt handler When the NAPI budget was not all used, xenvif_poll() would call napi_complete() /after/ enabling the interrupt. This resulted in a race between the napi_complete() and the napi_schedule() in the interrupt handler. The use of local_irq_save/restore() avoided by race iff the handler is running on the same CPU but not if it was running on a different CPU. Fix this properly by calling napi_complete() before reenabling interrupts (in the xenvif_napi_schedule_or_enable_irq() call). Signed-off-by: David Vrabel Acked-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 30 +++-------------------------- drivers/net/xen-netback/netback.c | 4 ++-- 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 630a3fcf65bc..0d4a285cbd7e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -226,7 +226,7 @@ int xenvif_map_frontend_rings(struct xenvif *vif, grant_ref_t rx_ring_ref); /* Check for SKBs from frontend and schedule backend processing */ -void xenvif_check_rx_xenvif(struct xenvif *vif); +void xenvif_napi_schedule_or_enable_events(struct xenvif *vif); /* Prevent the device from generating any further traffic. */ void xenvif_carrier_off(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ef05c5c49d41..20e9defa1060 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -75,32 +75,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget) work_done = xenvif_tx_action(vif, budget); if (work_done < budget) { - int more_to_do = 0; - unsigned long flags; - - /* It is necessary to disable IRQ before calling - * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might - * lose event from the frontend. - * - * Consider: - * RING_HAS_UNCONSUMED_REQUESTS - * - * __napi_complete - * - * This handler is still in scheduled state so the - * event has no effect at all. After __napi_complete - * this handler is descheduled and cannot get - * scheduled again. We lose event in this case and the ring - * will be completely stalled. - */ - - local_irq_save(flags); - - RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); - if (!more_to_do) - __napi_complete(napi); - - local_irq_restore(flags); + napi_complete(napi); + xenvif_napi_schedule_or_enable_events(vif); } return work_done; @@ -194,7 +170,7 @@ static void xenvif_up(struct xenvif *vif) enable_irq(vif->tx_irq); if (vif->tx_irq != vif->rx_irq) enable_irq(vif->rx_irq); - xenvif_check_rx_xenvif(vif); + xenvif_napi_schedule_or_enable_events(vif); } static void xenvif_down(struct xenvif *vif) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64ab1d141f1c..7367208ee8cd 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -716,7 +716,7 @@ done: notify_remote_via_irq(vif->rx_irq); } -void xenvif_check_rx_xenvif(struct xenvif *vif) +void xenvif_napi_schedule_or_enable_events(struct xenvif *vif) { int more_to_do; @@ -750,7 +750,7 @@ static void tx_credit_callback(unsigned long data) { struct xenvif *vif = (struct xenvif *)data; tx_add_credit(vif); - xenvif_check_rx_xenvif(vif); + xenvif_napi_schedule_or_enable_events(vif); } static void xenvif_tx_err(struct xenvif *vif, From 2e47b291953c35afa4e20a65475954c1a1b9afe1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 15 May 2014 16:38:41 -0700 Subject: [PATCH 109/178] net: ipv6: make "ip -6 route get mark xyz" work. Currently, "ip -6 route get mark xyz" ignores the mark passed in by userspace. Make it honour the mark, just like IPv4 does. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58c449ad7f6e..6ebdb7b6744c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2730,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (iif) { struct net_device *dev; int flags = 0; From d1c0b471b340f43fa857d19150e029257d7bb475 Mon Sep 17 00:00:00 2001 From: Fabian Godehardt Date: Fri, 16 May 2014 06:21:44 +0200 Subject: [PATCH 110/178] net/dsa/dsa.c: increment chip_index during of_node handling on dsa_of_probe() Adding more than one chip on device-tree currently causes the probing routine to always use the first chips data pointer. Signed-off-by: Fabian Godehardt Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0eb5d5e76dfb..5db37cef50a9 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -406,8 +406,9 @@ static int dsa_of_probe(struct platform_device *pdev) goto out_free; } - chip_index = 0; + chip_index = -1; for_each_available_child_of_node(np, child) { + chip_index++; cd = &pd->chip[chip_index]; cd->mii_bus = &mdio_bus->dev; From 22fb22eaebf4d16987f3fd9c3484c436ee0badf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Fri, 16 May 2014 08:34:39 +0300 Subject: [PATCH 111/178] ipv4: ip_tunnels: disable cache for nbma gre tunnels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The connected check fails to check for ip_gre nbma mode tunnels properly. ip_gre creates temporary tnl_params with daddr specified to pass-in the actual target on per-packet basis from neighbor layer. Detect these tunnels by inspecting the actual tunnel configuration. Minimal test case: ip route add 192.168.1.1/32 via 10.0.0.1 ip route add 192.168.1.2/32 via 10.0.0.2 ip tunnel add nbma0 mode gre key 1 tos c0 ip addr add 172.17.0.0/16 dev nbma0 ip link set nbma0 up ip neigh add 172.17.0.1 lladdr 192.168.1.1 dev nbma0 ip neigh add 172.17.0.2 lladdr 192.168.1.2 dev nbma0 ping 172.17.0.1 ping 172.17.0.2 The second ping should be going to 192.168.1.2 and head 10.0.0.2; but cached gre tunnel level route is used and it's actually going to 192.168.1.1 via 10.0.0.1. The lladdr's need to go to separate dst for the bug to trigger. Test case uses separate route entries, but this can also happen when the route entry is same: if there is a nexthop exception or the GRE tunnel is IPsec'ed in which case the dst points to xfrm bundle unique to the gre lladdr. Fixes: 7d442fab0a67 ("ipv4: Cache dst in tunnels") Signed-off-by: Timo Teräs Cc: Tom Herbert Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b3f859731c60..49721d78df75 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -540,9 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; - bool connected = true; + bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); dst = tnl_params->daddr; if (dst == 0) { From 29e98242783ed3ba569797846a606ba66f781625 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 May 2014 11:34:37 -0700 Subject: [PATCH 112/178] net: gro: make sure skb->cb[] initial content has not to be zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from linux-3.13, GRO attempts to build full size skbs. Problem is the commit assumed one particular field in skb->cb[] was clean, but it is not the case on some stacked devices. Timo reported a crash in case traffic is decrypted before reaching a GRE device. Fix this by initializing NAPI_GRO_CB(skb)->last at the right place, this also removes one conditional. Thanks a lot to Timo for providing full reports and bisecting this. Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") Bisected-by: Timo Teras Signed-off-by: Eric Dumazet Tested-by: Timo Teräs Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/core/skbuff.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6da649bde4f7..ed928e846559 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3951,6 +3951,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; + NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b62343f5837..8383b2bddeb9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,7 +3076,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (unlikely(p->len + len >= 65536)) return -E2BIG; - lp = NAPI_GRO_CB(p)->last ?: p; + lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { @@ -3192,7 +3192,7 @@ merge: __skb_pull(skb, offset); - if (!NAPI_GRO_CB(p)->last) + if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; From 4085ebe8c31face855fd01ee40372cb4aab1df3a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:53 -0400 Subject: [PATCH 113/178] net: Find the nesting level of a given device by type. Multiple devices in the kernel can be stacked/nested and they need to know their nesting level for the purposes of lockdep. This patch provides a generic function that determines a nesting level of a particular device by its type (ex: vlan, macvlan, etc). We only care about nesting of the same type of devices. For example: eth0 <- vlan0.10 <- macvlan0 <- vlan1.20 The nesting level of vlan1.20 would be 1, since there is another vlan in the stack under it. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++ net/core/dev.c | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20e99efb1ca6..fb912e8e5c7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3077,6 +3077,14 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) +void *netdev_lower_get_next(struct net_device *dev, + struct list_head **iter); +#define netdev_for_each_lower_dev(dev, ldev, iter) \ + for (iter = &(dev)->adj_list.lower, \ + ldev = netdev_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_lower_get_next(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); @@ -3092,6 +3100,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); diff --git a/net/core/dev.c b/net/core/dev.c index ed928e846559..6ee3ac25ed72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4622,6 +4622,32 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); +/** + * netdev_lower_get_next - Get the next device from the lower neighbour + * list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_lower_get_next); + /** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU @@ -5072,6 +5098,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_dev_get_private); + +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)) +{ + struct net_device *lower = NULL; + struct list_head *iter; + int max_nest = -1; + int nest; + + ASSERT_RTNL(); + + netdev_for_each_lower_dev(dev, lower, iter) { + nest = dev_get_nest_level(lower, type_check); + if (max_nest < nest) + max_nest = nest; + } + + if (type_check(dev)) + max_nest++; + + return max_nest; +} +EXPORT_SYMBOL(dev_get_nest_level); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; From 25175ba5c9bff9aaf0229df34bb5d54c81633ec3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:54 -0400 Subject: [PATCH 114/178] net: Allow for more then a single subclass for netif_addr_lock Currently netif_addr_lock_nested assumes that there can be only a single nesting level between 2 devices. However, if we have multiple devices of the same type stacked, this fails. For example: eth0 <-- vlan0.10 <-- vlan0.10.20 A more complicated configuration may stack more then one type of device in different order. Ex: eth0 <-- vlan0.10 <-- macvlan0 <-- vlan1.10.20 <-- macvlan1 This patch adds an ndo_* function that allows each stackable device to report its nesting level. If the device doesn't provide this function default subclass of 1 is used. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fb912e8e5c7f..9d4b1f1b6b75 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1144,6 +1144,7 @@ struct net_device_ops { netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, struct net_device *dev, void *priv); + int (*ndo_get_lock_subclass)(struct net_device *dev); }; /** @@ -2950,7 +2951,12 @@ static inline void netif_addr_lock(struct net_device *dev) static inline void netif_addr_lock_nested(struct net_device *dev) { - spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING); + int subclass = SINGLE_DEPTH_NESTING; + + if (dev->netdev_ops->ndo_get_lock_subclass) + subclass = dev->netdev_ops->ndo_get_lock_subclass(dev); + + spin_lock_nested(&dev->addr_list_lock, subclass); } static inline void netif_addr_lock_bh(struct net_device *dev) From d38569ab2bba6e6b3233acfc3a84cdbcfbd1f79f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:55 -0400 Subject: [PATCH 115/178] vlan: Fix lockdep warning with stacked vlan devices. This reverts commit dc8eaaa006350d24030502a4521542e74b5cb39f. vlan: Fix lockdep warning when vlan dev handle notification Instead we use the new new API to find the lock subclass of our vlan device. This way we can support configurations where vlans are interspersed with other devices: bond -> vlan -> macvlan -> vlan Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 ++- net/8021q/vlan.c | 1 + net/8021q/vlan_dev.c | 52 +++++++---------------------------------- net/core/dev.c | 1 - 4 files changed, 12 insertions(+), 45 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 13bbbde00e68..724bde8477b2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline int is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } @@ -159,6 +159,7 @@ struct vlan_dev_priv { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif + unsigned int nest_level; }; static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 175273f38cb1..44ebd5c2cd4a 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; + vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 733ec283ed1b..019efb79708f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -493,48 +493,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) } } -static int vlan_calculate_locking_subclass(struct net_device *real_dev) -{ - int subclass = 0; - - while (is_vlan_dev(real_dev)) { - subclass++; - real_dev = vlan_dev_priv(real_dev)->real_dev; - } - - return subclass; -} - -static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - -static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); - vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -562,6 +524,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); } +static int vlan_dev_get_lock_subclass(struct net_device *dev) +{ + return vlan_dev_priv(dev)->nest_level; +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .rebuild = vlan_dev_rebuild_header, @@ -597,7 +564,6 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; netif_carrier_off(dev); @@ -646,8 +612,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - subclass = vlan_calculate_locking_subclass(dev); - vlan_dev_set_lockdep_class(dev, subclass); + vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev)); vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan_dev_priv(dev)->vlan_pcpu_stats) @@ -819,6 +784,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, }; void vlan_setup(struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 6ee3ac25ed72..2b872bfbd172 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5287,7 +5287,6 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } -EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { From c674ac30c549596295eb0a5af7f4714c0b905b6f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:56 -0400 Subject: [PATCH 116/178] macvlan: Fix lockdep warnings with stacked macvlan devices Macvlan devices try to avoid stacking, but that's not always successfull or even desired. As an example, the following configuration is perefectly legal and valid: eth0 <--- macvlan0 <---- vlan0.10 <--- macvlan1 However, this configuration produces the following lockdep trace: [ 115.620418] ====================================================== [ 115.620477] [ INFO: possible circular locking dependency detected ] [ 115.620516] 3.15.0-rc1+ #24 Not tainted [ 115.620540] ------------------------------------------------------- [ 115.620577] ip/1704 is trying to acquire lock: [ 115.620604] (&vlan_netdev_addr_lock_key/1){+.....}, at: [] dev_uc_sync+0x3c/0x80 [ 115.620686] but task is already holding lock: [ 115.620723] (&macvlan_netdev_addr_lock_key){+.....}, at: [] dev_set_rx_mode+0x1e/0x40 [ 115.620795] which lock already depends on the new lock. [ 115.620853] the existing dependency chain (in reverse order) is: [ 115.620894] -> #1 (&macvlan_netdev_addr_lock_key){+.....}: [ 115.620935] [] lock_acquire+0xa2/0x130 [ 115.620974] [] _raw_spin_lock_nested+0x37/0x50 [ 115.621019] [] vlan_dev_set_rx_mode+0x53/0x110 [8021q] [ 115.621066] [] __dev_set_rx_mode+0x57/0xa0 [ 115.621105] [] dev_set_rx_mode+0x26/0x40 [ 115.621143] [] __dev_open+0xde/0x140 [ 115.621174] [] __dev_change_flags+0x9d/0x170 [ 115.621174] [] dev_change_flags+0x29/0x60 [ 115.621174] [] do_setlink+0x321/0x9a0 [ 115.621174] [] rtnl_newlink+0x51f/0x730 [ 115.621174] [] rtnetlink_rcv_msg+0x95/0x250 [ 115.621174] [] netlink_rcv_skb+0xa9/0xc0 [ 115.621174] [] rtnetlink_rcv+0x2a/0x40 [ 115.621174] [] netlink_unicast+0xf0/0x1c0 [ 115.621174] [] netlink_sendmsg+0x2ff/0x740 [ 115.621174] [] sock_sendmsg+0x8b/0xc0 [ 115.621174] [] ___sys_sendmsg+0x369/0x380 [ 115.621174] [] __sys_sendmsg+0x42/0x80 [ 115.621174] [] SyS_sendmsg+0x12/0x20 [ 115.621174] [] system_call_fastpath+0x16/0x1b [ 115.621174] -> #0 (&vlan_netdev_addr_lock_key/1){+.....}: [ 115.621174] [] __lock_acquire+0x1773/0x1a60 [ 115.621174] [] lock_acquire+0xa2/0x130 [ 115.621174] [] _raw_spin_lock_nested+0x37/0x50 [ 115.621174] [] dev_uc_sync+0x3c/0x80 [ 115.621174] [] macvlan_set_mac_lists+0xca/0x110 [macvlan] [ 115.621174] [] __dev_set_rx_mode+0x57/0xa0 [ 115.621174] [] dev_set_rx_mode+0x26/0x40 [ 115.621174] [] __dev_open+0xde/0x140 [ 115.621174] [] __dev_change_flags+0x9d/0x170 [ 115.621174] [] dev_change_flags+0x29/0x60 [ 115.621174] [] do_setlink+0x321/0x9a0 [ 115.621174] [] rtnl_newlink+0x51f/0x730 [ 115.621174] [] rtnetlink_rcv_msg+0x95/0x250 [ 115.621174] [] netlink_rcv_skb+0xa9/0xc0 [ 115.621174] [] rtnetlink_rcv+0x2a/0x40 [ 115.621174] [] netlink_unicast+0xf0/0x1c0 [ 115.621174] [] netlink_sendmsg+0x2ff/0x740 [ 115.621174] [] sock_sendmsg+0x8b/0xc0 [ 115.621174] [] ___sys_sendmsg+0x369/0x380 [ 115.621174] [] __sys_sendmsg+0x42/0x80 [ 115.621174] [] SyS_sendmsg+0x12/0x20 [ 115.621174] [] system_call_fastpath+0x16/0x1b [ 115.621174] other info that might help us debug this: [ 115.621174] Possible unsafe locking scenario: [ 115.621174] CPU0 CPU1 [ 115.621174] ---- ---- [ 115.621174] lock(&macvlan_netdev_addr_lock_key); [ 115.621174] lock(&vlan_netdev_addr_lock_key/1); [ 115.621174] lock(&macvlan_netdev_addr_lock_key); [ 115.621174] lock(&vlan_netdev_addr_lock_key/1); [ 115.621174] *** DEADLOCK *** [ 115.621174] 2 locks held by ip/1704: [ 115.621174] #0: (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv+0x1b/0x40 [ 115.621174] #1: (&macvlan_netdev_addr_lock_key){+.....}, at: [] dev_set_rx_mode+0x1e/0x40 [ 115.621174] stack backtrace: [ 115.621174] CPU: 3 PID: 1704 Comm: ip Not tainted 3.15.0-rc1+ #24 [ 115.621174] Hardware name: Hewlett-Packard HP xw8400 Workstation/0A08h, BIOS 786D5 v02.38 10/25/2010 [ 115.621174] ffffffff82339ae0 ffff880465f79568 ffffffff816ee20c ffffffff82339ae0 [ 115.621174] ffff880465f795a8 ffffffff816e9e1b ffff880465f79600 ffff880465b019c8 [ 115.621174] 0000000000000001 0000000000000002 ffff880465b019c8 ffff880465b01230 [ 115.621174] Call Trace: [ 115.621174] [] dump_stack+0x4d/0x66 [ 115.621174] [] print_circular_bug+0x200/0x20e [ 115.621174] [] __lock_acquire+0x1773/0x1a60 [ 115.621174] [] ? trace_hardirqs_on_caller+0xb2/0x1d0 [ 115.621174] [] lock_acquire+0xa2/0x130 [ 115.621174] [] ? dev_uc_sync+0x3c/0x80 [ 115.621174] [] _raw_spin_lock_nested+0x37/0x50 [ 115.621174] [] ? dev_uc_sync+0x3c/0x80 [ 115.621174] [] dev_uc_sync+0x3c/0x80 [ 115.621174] [] macvlan_set_mac_lists+0xca/0x110 [macvlan] [ 115.621174] [] __dev_set_rx_mode+0x57/0xa0 [ 115.621174] [] dev_set_rx_mode+0x26/0x40 [ 115.621174] [] __dev_open+0xde/0x140 [ 115.621174] [] __dev_change_flags+0x9d/0x170 [ 115.621174] [] dev_change_flags+0x29/0x60 [ 115.621174] [] ? mem_cgroup_bad_page_check+0x21/0x30 [ 115.621174] [] do_setlink+0x321/0x9a0 [ 115.621174] [] ? __lock_acquire+0x37c/0x1a60 [ 115.621174] [] rtnl_newlink+0x51f/0x730 [ 115.621174] [] ? rtnl_newlink+0xe9/0x730 [ 115.621174] [] rtnetlink_rcv_msg+0x95/0x250 [ 115.621174] [] ? trace_hardirqs_on+0xd/0x10 [ 115.621174] [] ? rtnetlink_rcv+0x1b/0x40 [ 115.621174] [] ? rtnetlink_rcv+0x40/0x40 [ 115.621174] [] netlink_rcv_skb+0xa9/0xc0 [ 115.621174] [] rtnetlink_rcv+0x2a/0x40 [ 115.621174] [] netlink_unicast+0xf0/0x1c0 [ 115.621174] [] netlink_sendmsg+0x2ff/0x740 [ 115.621174] [] sock_sendmsg+0x8b/0xc0 [ 115.621174] [] ? might_fault+0x5f/0xb0 [ 115.621174] [] ? might_fault+0xa8/0xb0 [ 115.621174] [] ? might_fault+0x5f/0xb0 [ 115.621174] [] ? verify_iovec+0x5e/0xe0 [ 115.621174] [] ___sys_sendmsg+0x369/0x380 [ 115.621174] [] ? __do_page_fault+0x11d/0x570 [ 115.621174] [] ? up_read+0x1f/0x40 [ 115.621174] [] ? __do_page_fault+0x214/0x570 [ 115.621174] [] ? mntput_no_expire+0x6b/0x1c0 [ 115.621174] [] ? mntput_no_expire+0x17/0x1c0 [ 115.621174] [] ? mntput+0x24/0x40 [ 115.621174] [] __sys_sendmsg+0x42/0x80 [ 115.621174] [] SyS_sendmsg+0x12/0x20 [ 115.621174] [] system_call_fastpath+0x16/0x1b Fix this by correctly providing macvlan lockdep class. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 12 ++++++++++-- include/linux/if_macvlan.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c5fb9cf95c12..d53e299ae1d9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -517,6 +517,11 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) +static int macvlan_get_nest_level(struct net_device *dev) +{ + return ((struct macvlan_dev *)netdev_priv(dev))->nest_level; +} + static void macvlan_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) @@ -527,8 +532,9 @@ static void macvlan_set_lockdep_class_one(struct net_device *dev, static void macvlan_set_lockdep_class(struct net_device *dev) { - lockdep_set_class(&dev->addr_list_lock, - &macvlan_netdev_addr_lock_key); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macvlan_netdev_addr_lock_key, + macvlan_get_nest_level(dev)); netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL); } @@ -723,6 +729,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_fdb_add = macvlan_fdb_add, .ndo_fdb_del = macvlan_fdb_del, .ndo_fdb_dump = ndo_dflt_fdb_dump, + .ndo_get_lock_subclass = macvlan_get_nest_level, }; void macvlan_common_setup(struct net_device *dev) @@ -851,6 +858,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; + vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 7c8b20b120ea..a9a53b12397b 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -56,6 +56,7 @@ struct macvlan_dev { int numqueues; netdev_features_t tap_features; int minor; + int nest_level; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, From 44a4085538c844e79d6ee6bcf46fabf7c57a9a38 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:20:38 -0400 Subject: [PATCH 117/178] bonding: Fix stacked device detection in arp monitoring Prior to commit fbd929f2dce460456807a51e18d623db3db9f077 bonding: support QinQ for bond arp interval the arp monitoring code allowed for proper detection of devices stacked on top of vlans. Since the above commit, the code can still detect a device stacked on top of single vlan, but not a device stacked on top of Q-in-Q configuration. The search will only set the inner vlan tag if the route device is the vlan device. However, this is not always the case, as it is possible to extend the stacked configuration. With this patch it is possible to provision devices on top Q-in-Q vlan configuration that should be used as a source of ARP monitoring information. For example: ip link add link bond0 vlan10 type vlan proto 802.1q id 10 ip link add link vlan10 vlan100 type vlan proto 802.1q id 100 ip link add link vlan100 type macvlan Note: This patch limites the number of stacked VLANs to 2, just like before. The original, however had another issue in that if we had more then 2 levels of VLANs, we would end up generating incorrectly tagged traffic. This is no longer possible. Fixes: fbd929f2dce460456807a51e18d623db3db9f077 (bonding: support QinQ for bond arp interval) CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Ding Tianhong CC: Patric McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 134 ++++++++++++++++---------------- drivers/net/bonding/bonding.h | 1 + include/linux/if_vlan.h | 6 ++ include/linux/netdevice.h | 9 +++ net/core/dev.c | 26 +++++++ 5 files changed, 107 insertions(+), 69 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 69aff72c8957..d3a67896d435 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2126,10 +2126,10 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) */ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, - struct bond_vlan_tag *inner, - struct bond_vlan_tag *outer) + struct bond_vlan_tag *tags) { struct sk_buff *skb; + int i; pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", arp_op, slave_dev->name, &dest_ip, &src_ip); @@ -2141,21 +2141,26 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, net_err_ratelimited("ARP packet allocation failed\n"); return; } - if (outer->vlan_id) { - if (inner->vlan_id) { - pr_debug("inner tag: proto %X vid %X\n", - ntohs(inner->vlan_proto), inner->vlan_id); - skb = __vlan_put_tag(skb, inner->vlan_proto, - inner->vlan_id); - if (!skb) { - net_err_ratelimited("failed to insert inner VLAN tag\n"); - return; - } - } - pr_debug("outer reg: proto %X vid %X\n", - ntohs(outer->vlan_proto), outer->vlan_id); - skb = vlan_put_tag(skb, outer->vlan_proto, outer->vlan_id); + /* Go through all the tags backwards and add them to the packet */ + for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) { + if (!tags[i].vlan_id) + continue; + + pr_debug("inner tag: proto %X vid %X\n", + ntohs(tags[i].vlan_proto), tags[i].vlan_id); + skb = __vlan_put_tag(skb, tags[i].vlan_proto, + tags[i].vlan_id); + if (!skb) { + net_err_ratelimited("failed to insert inner VLAN tag\n"); + return; + } + } + /* Set the outer tag */ + if (tags[0].vlan_id) { + pr_debug("outer tag: proto %X vid %X\n", + ntohs(tags[0].vlan_proto), tags[0].vlan_id); + skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id); if (!skb) { net_err_ratelimited("failed to insert outer VLAN tag\n"); return; @@ -2164,22 +2169,52 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, arp_xmit(skb); } +/* Validate the device path between the @start_dev and the @end_dev. + * The path is valid if the @end_dev is reachable through device + * stacking. + * When the path is validated, collect any vlan information in the + * path. + */ +static bool bond_verify_device_path(struct net_device *start_dev, + struct net_device *end_dev, + struct bond_vlan_tag *tags) +{ + struct net_device *upper; + struct list_head *iter; + int idx; + + if (start_dev == end_dev) + return true; + + netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { + if (bond_verify_device_path(upper, end_dev, tags)) { + if (is_vlan_dev(upper)) { + idx = vlan_get_encap_level(upper); + if (idx >= BOND_MAX_VLAN_ENCAP) + return false; + + tags[idx].vlan_proto = + vlan_dev_vlan_proto(upper); + tags[idx].vlan_id = vlan_dev_vlan_id(upper); + } + return true; + } + } + + return false; +} static void bond_arp_send_all(struct bonding *bond, struct slave *slave) { - struct net_device *upper, *vlan_upper; - struct list_head *iter, *vlan_iter; struct rtable *rt; - struct bond_vlan_tag inner, outer; + struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; __be32 *targets = bond->params.arp_targets, addr; int i; + bool ret; for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { pr_debug("basa: target %pI4\n", &targets[i]); - inner.vlan_proto = 0; - inner.vlan_id = 0; - outer.vlan_proto = 0; - outer.vlan_id = 0; + memset(tags, 0, sizeof(tags)); /* Find out through which dev should the packet go */ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, @@ -2192,7 +2227,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", bond->dev->name, &targets[i]); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 0, &inner, &outer); + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], + 0, tags); continue; } @@ -2201,52 +2237,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) goto found; rcu_read_lock(); - /* first we search only for vlan devices. for every vlan - * found we verify its upper dev list, searching for the - * rt->dst.dev. If found we save the tag of the vlan and - * proceed to send the packet. - */ - netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper, - vlan_iter) { - if (!is_vlan_dev(vlan_upper)) - continue; - - if (vlan_upper == rt->dst.dev) { - outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper); - outer.vlan_id = vlan_dev_vlan_id(vlan_upper); - rcu_read_unlock(); - goto found; - } - netdev_for_each_all_upper_dev_rcu(vlan_upper, upper, - iter) { - if (upper == rt->dst.dev) { - /* If the upper dev is a vlan dev too, - * set the vlan tag to inner tag. - */ - if (is_vlan_dev(upper)) { - inner.vlan_proto = vlan_dev_vlan_proto(upper); - inner.vlan_id = vlan_dev_vlan_id(upper); - } - outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper); - outer.vlan_id = vlan_dev_vlan_id(vlan_upper); - rcu_read_unlock(); - goto found; - } - } - } - - /* if the device we're looking for is not on top of any of - * our upper vlans, then just search for any dev that - * matches, and in case it's a vlan - save the id - */ - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (upper == rt->dst.dev) { - rcu_read_unlock(); - goto found; - } - } + ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags); rcu_read_unlock(); + if (ret) + goto found; + /* Not our device - skip */ pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", bond->dev->name, &targets[i], @@ -2259,7 +2255,7 @@ found: addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); ip_rt_put(rt); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, &inner, &outer); + addr, tags); } } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index b8bdd0acc8f3..00bea320e3b5 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -36,6 +36,7 @@ #define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" +#define BOND_MAX_VLAN_ENCAP 2 #define BOND_MAX_ARP_TARGETS 16 #define BOND_DEFAULT_MIIMON 100 diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 724bde8477b2..c901b13b6f03 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -484,4 +484,10 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ skb->protocol = htons(ETH_P_802_2); } + +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG_ON(!is_vlan_dev(dev)); + return vlan_dev_priv(dev)->nest_level; +} #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d4b1f1b6b75..b42d07b0390b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3056,9 +3056,18 @@ extern int weight_p; extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); +/* iterate through upper list, must be called under RCU read lock */ +#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ + for (iter = &(dev)->adj_list.upper, \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ + updev; \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) + /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->all_adj_list.upper, \ diff --git a/net/core/dev.c b/net/core/dev.c index 2b872bfbd172..9abc503b19b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4541,6 +4541,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list) } EXPORT_SYMBOL(netdev_adjacent_get_private); +/** + * netdev_upper_get_next_dev_rcu - Get the next dev from upper list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next device from the dev's upper list, starting from iter + * position. The caller must hold RCU read lock. + */ +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + + upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} +EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); + /** * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device From f60c3704e87d39356d00c71bf51e55c2c55ad4f5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:20:39 -0400 Subject: [PATCH 118/178] bonding: Fix alb mode to only use first level vlans. ALB/TLB learning packets use all vlans configured on top of the bond. This ends up being incorrect if we have a stack of vlans on top of the bond. ALB/TLB should only use first level/outer most vlans in its announcements. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 9f69e818b000..e53847884319 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1045,7 +1045,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) /* loop through vlans and send one packet for each */ rcu_read_lock(); netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (upper->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_id(upper)); } From 5aa4ecfd0ddb1e6dcd1c886e6c49677550f581aa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 17 May 2014 11:28:05 -0700 Subject: [PATCH 119/178] sparc64: Add membar to Niagara2 memcpy code. This is the prevent previous stores from overlapping the block stores done by the memcpy loop. Based upon a glibc patch by Jose E. Marchesi Signed-off-by: David S. Miller --- arch/sparc/lib/NG2memcpy.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 2c20ad63ddbf..30eee6e8a81b 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -236,6 +236,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ VISEntryHalf + membar #Sync alignaddr %o1, %g0, %g0 add %o1, (64 - 1), %o4 From 02948344fbf511c6eec28687dc76bb64b758ff93 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 15 May 2014 23:51:43 -0700 Subject: [PATCH 120/178] bnx2x: Convert return 0 to return rc These "return 0;" uses seem wrong as there are rc variables where error return values are set but unused. Signed-off-by: Joe Perches Acked-by: Dmitry Kravkov Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 81cc2d9831c2..b8078d50261b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2695,7 +2695,7 @@ out: bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_MAC); } - return 0; + return rc; } int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 0c067e8564dd..784c7155b98a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -747,7 +747,7 @@ int bnx2x_vfpf_config_mac(struct bnx2x *bp, u8 *addr, u8 vf_qid, bool set) out: bnx2x_vfpf_finalize(bp, &req->first_tlv); - return 0; + return rc; } /* request pf to config rss table for vf queues*/ From 4f337ed5c014752b000f593182fdcfa3ecf0d166 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 16 May 2014 15:57:59 -0700 Subject: [PATCH 121/178] MAINTAINERS: Pravin Shelar is Open vSwitch maintainer. Pravin will be maintaining Open vSwitch going forward. CC: Pravin Shelar Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7578deb8ff20..0eb0adb16f1e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6511,10 +6511,10 @@ T: git git://openrisc.net/~jonas/linux F: arch/openrisc/ OPENVSWITCH -M: Jesse Gross +M: Pravin Shelar L: dev@openvswitch.org W: http://openvswitch.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pshelar/openvswitch.git S: Maintained F: net/openvswitch/ From 524369e2391f4b422d0efdd11d526a373a11a43a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 May 2014 19:45:38 +0200 Subject: [PATCH 122/178] can: c_can: remove obsolete STRICT_FRAME_ORDERING Kconfig option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 2b9aecdce2 ("can: c_can: Disable rx split as workaround") a new Kconfig option was introduced as a workaround. The tests performed by Alexander Stein confirmed this option to be obsolete with all the other cleanups and fixes that had been discussed that time: http://marc.info/?l=linux-can&m=139746476821294&w=2 Both (author and tester) agreed to remove this Kconfig option again: http://marc.info/?l=linux-can&m=139883820714229&w=2 As some more cleanups took place since then a simple revert is not possible. This patch removes the entire option as it would behave when disabled. Further beautification’s can be done later. Signed-off-by: Oliver Hartkopp Tested-by: Alexander Stein Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/Kconfig | 7 ------- drivers/net/can/c_can/c_can.c | 36 ----------------------------------- 2 files changed, 43 deletions(-) diff --git a/drivers/net/can/c_can/Kconfig b/drivers/net/can/c_can/Kconfig index 8ab7103d4f44..61ffc12d8fd8 100644 --- a/drivers/net/can/c_can/Kconfig +++ b/drivers/net/can/c_can/Kconfig @@ -14,13 +14,6 @@ config CAN_C_CAN_PLATFORM SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com) boards like am335x, dm814x, dm813x and dm811x. -config CAN_C_CAN_STRICT_FRAME_ORDERING - bool "Force a strict RX CAN frame order (may cause frame loss)" - ---help--- - The RX split buffer prevents packet reordering but can cause packet - loss. Only enable this option when you accept to lose CAN frames - in favour of getting the received CAN frames in the correct order. - config CAN_C_CAN_PCI tristate "Generic PCI Bus based C_CAN/D_CAN driver" depends on PCI diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index a2ca820b5373..95e04e2002da 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -732,26 +732,12 @@ static u32 c_can_adjust_pending(u32 pend) static inline void c_can_rx_object_get(struct net_device *dev, struct c_can_priv *priv, u32 obj) { -#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING - if (obj < C_CAN_MSG_RX_LOW_LAST) - c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_LOW); - else -#endif c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high); } static inline void c_can_rx_finalize(struct net_device *dev, struct c_can_priv *priv, u32 obj) { -#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING - if (obj < C_CAN_MSG_RX_LOW_LAST) - priv->rxmasked |= BIT(obj - 1); - else if (obj == C_CAN_MSG_RX_LOW_LAST) { - priv->rxmasked = 0; - /* activate all lower message objects */ - c_can_activate_all_lower_rx_msg_obj(dev, IF_RX); - } -#endif if (priv->type != BOSCH_D_CAN) c_can_object_get(dev, IF_RX, obj, IF_COMM_CLR_NEWDAT); } @@ -799,9 +785,6 @@ static inline u32 c_can_get_pending(struct c_can_priv *priv) { u32 pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG); -#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING - pend &= ~priv->rxmasked; -#endif return pend; } @@ -814,25 +797,6 @@ static inline u32 c_can_get_pending(struct c_can_priv *priv) * has arrived. To work-around this issue, we keep two groups of message * objects whose partitioning is defined by C_CAN_MSG_OBJ_RX_SPLIT. * - * If CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING = y - * - * To ensure in-order frame reception we use the following - * approach while re-activating a message object to receive further - * frames: - * - if the current message object number is lower than - * C_CAN_MSG_RX_LOW_LAST, do not clear the NEWDAT bit while clearing - * the INTPND bit. - * - if the current message object number is equal to - * C_CAN_MSG_RX_LOW_LAST then clear the NEWDAT bit of all lower - * receive message objects. - * - if the current message object number is greater than - * C_CAN_MSG_RX_LOW_LAST then clear the NEWDAT bit of - * only this message object. - * - * This can cause packet loss! - * - * If CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING = n - * * We clear the newdat bit right away. * * This can result in packet reordering when the readout is slow. From 0819b2e30ccb93edf04876237b6205eef84ec8d2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 May 2014 20:23:48 +0200 Subject: [PATCH 123/178] perf: Limit perf_event_attr::sample_period to 63 bits Vince reported that using a large sample_period (one with bit 63 set) results in wreckage since while the sample_period is fundamentally unsigned (negative periods don't make sense) the way we implement things very much rely on signed logic. So limit sample_period to 63 bits to avoid tripping over this. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/n/tip-p25fhunibl4y3qi0zuqmyf4b@git.kernel.org Signed-off-by: Thomas Gleixner --- kernel/events/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 71232844f235..1d1ec6453a08 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7029,6 +7029,9 @@ SYSCALL_DEFINE5(perf_event_open, if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; + } else { + if (attr.sample_period & (1ULL << 63)) + return -EINVAL; } /* From 39af6b1678afa5880dda7e375cf3f9d395087f6d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Apr 2014 11:04:08 +0200 Subject: [PATCH 124/178] perf: Prevent false warning in perf_swevent_add The perf cpu offline callback takes down all cpu context events and releases swhash->swevent_hlist. This could race with task context software event being just scheduled on this cpu via perf_swevent_add while cpu hotplug code already cleaned up event's data. The race happens in the gap between the cpu notifier code and the cpu being actually taken down. Note that only cpu ctx events are terminated in the perf cpu hotplug code. It's easily reproduced with: $ perf record -e faults perf bench sched pipe while putting one of the cpus offline: # echo 0 > /sys/devices/system/cpu/cpu1/online Console emits following warning: WARNING: CPU: 1 PID: 2845 at kernel/events/core.c:5672 perf_swevent_add+0x18d/0x1a0() Modules linked in: CPU: 1 PID: 2845 Comm: sched-pipe Tainted: G W 3.14.0+ #256 Hardware name: Intel Corporation Montevina platform/To be filled by O.E.M., BIOS AMVACRB1.86C.0066.B00.0805070703 05/07/2008 0000000000000009 ffff880077233ab8 ffffffff81665a23 0000000000200005 0000000000000000 ffff880077233af8 ffffffff8104732c 0000000000000046 ffff88007467c800 0000000000000002 ffff88007a9cf2a0 0000000000000001 Call Trace: [] dump_stack+0x4f/0x7c [] warn_slowpath_common+0x8c/0xc0 [] warn_slowpath_null+0x1a/0x20 [] perf_swevent_add+0x18d/0x1a0 [] event_sched_in.isra.75+0x9e/0x1f0 [] group_sched_in+0x6a/0x1f0 [] ? sched_clock_local+0x25/0xa0 [] ctx_sched_in+0x1f6/0x450 [] perf_event_sched_in+0x6b/0xa0 [] perf_event_context_sched_in+0x7b/0xc0 [] __perf_event_task_sched_in+0x43e/0x460 [] ? put_lock_stats.isra.18+0xe/0x30 [] finish_task_switch+0xb8/0x100 [] __schedule+0x30e/0xad0 [] ? pipe_read+0x3e2/0x560 [] ? preempt_schedule_irq+0x3e/0x70 [] ? preempt_schedule_irq+0x3e/0x70 [] preempt_schedule_irq+0x44/0x70 [] retint_kernel+0x20/0x30 [] ? lockdep_sys_exit+0x1a/0x90 [] lockdep_sys_exit_thunk+0x35/0x67 [] ? sysret_check+0x5/0x56 Fixing this by tracking the cpu hotplug state and displaying the WARN only if current cpu is initialized properly. Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: stable@vger.kernel.org Reported-by: Fengguang Wu Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1396861448-10097-1-git-send-email-jolsa@redhat.com Signed-off-by: Thomas Gleixner --- kernel/events/core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1d1ec6453a08..feb1329ca331 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5419,6 +5419,9 @@ struct swevent_htable { /* Recursion avoidance in each contexts */ int recursion[PERF_NR_CONTEXTS]; + + /* Keeps track of cpu being initialized/exited */ + bool online; }; static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); @@ -5665,8 +5668,14 @@ static int perf_swevent_add(struct perf_event *event, int flags) hwc->state = !(flags & PERF_EF_START); head = find_swevent_head(swhash, event); - if (WARN_ON_ONCE(!head)) + if (!head) { + /* + * We can race with cpu hotplug code. Do not + * WARN if the cpu just got unplugged. + */ + WARN_ON_ONCE(swhash->online); return -EINVAL; + } hlist_add_head_rcu(&event->hlist_entry, head); @@ -7845,6 +7854,7 @@ static void perf_event_init_cpu(int cpu) struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); + swhash->online = true; if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; @@ -7902,6 +7912,7 @@ static void perf_event_exit_cpu(int cpu) perf_event_exit_cpu_context(cpu); mutex_lock(&swhash->hlist_mutex); + swhash->online = false; swevent_hlist_release(swhash); mutex_unlock(&swhash->hlist_mutex); } From b69cf53640da2b86439596118cfa95233154ee76 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 14 Mar 2014 10:50:33 +0100 Subject: [PATCH 125/178] perf: Fix a race between ring_buffer_detach() and ring_buffer_attach() Alexander noticed that we use RCU iteration on rb->event_list but do not use list_{add,del}_rcu() to add,remove entries to that list, nor do we observe proper grace periods when re-using the entries. Merge ring_buffer_detach() into ring_buffer_attach() such that attaching to the NULL buffer is detaching. Furthermore, ensure that between any 'detach' and 'attach' of the same event we observe the required grace period, but only when strictly required. In effect this means that only ioctl(.request = PERF_EVENT_IOC_SET_OUTPUT) will wait for a grace period, while the normal initial attach and final detach will not be delayed. This patch should, I think, do the right thing under all circumstances, the 'normal' cases all should never see the extra grace period, but the two cases: 1) PERF_EVENT_IOC_SET_OUTPUT on an event which already has a ring_buffer set, will now observe the required grace period between removing itself from the old and attaching itself to the new buffer. This case is 'simple' in that both buffers are present in perf_event_set_output() one could think an unconditional synchronize_rcu() would be sufficient; however... 2) an event that has a buffer attached, the buffer is destroyed (munmap) and then the event is attached to a new/different buffer using PERF_EVENT_IOC_SET_OUTPUT. This case is more complex because the buffer destruction does: ring_buffer_attach(.rb = NULL) followed by the ioctl() doing: ring_buffer_attach(.rb = foo); and we still need to observe the grace period between these two calls due to us reusing the event->rb_entry list_head. In order to make 2 happen we use Paul's latest cond_synchronize_rcu() call. Cc: Paul Mackerras Cc: Stephane Eranian Cc: Andi Kleen Cc: "Paul E. McKenney" Cc: Ingo Molnar Cc: Frederic Weisbecker Cc: Mike Galbraith Reported-by: Alexander Shishkin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140507123526.GD13658@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- include/linux/perf_event.h | 2 + kernel/events/core.c | 109 +++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 60 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3356abcfff18..3ef6ea12806a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -402,6 +402,8 @@ struct perf_event { struct ring_buffer *rb; struct list_head rb_entry; + unsigned long rcu_batches; + int rcu_pending; /* poll related */ wait_queue_head_t waitq; diff --git a/kernel/events/core.c b/kernel/events/core.c index feb1329ca331..440eefc67397 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3192,7 +3192,8 @@ static void free_event_rcu(struct rcu_head *head) } static void ring_buffer_put(struct ring_buffer *rb); -static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb); static void unaccount_event_cpu(struct perf_event *event, int cpu) { @@ -3252,8 +3253,6 @@ static void free_event(struct perf_event *event) unaccount_event(event); if (event->rb) { - struct ring_buffer *rb; - /* * Can happen when we close an event with re-directed output. * @@ -3261,12 +3260,7 @@ static void free_event(struct perf_event *event) * over us; possibly making our ring_buffer_put() the last. */ mutex_lock(&event->mmap_mutex); - rb = event->rb; - if (rb) { - rcu_assign_pointer(event->rb, NULL); - ring_buffer_detach(event, rb); - ring_buffer_put(rb); /* could be last */ - } + ring_buffer_attach(event, NULL); mutex_unlock(&event->mmap_mutex); } @@ -3850,28 +3844,47 @@ unlock: static void ring_buffer_attach(struct perf_event *event, struct ring_buffer *rb) { + struct ring_buffer *old_rb = NULL; unsigned long flags; - if (!list_empty(&event->rb_entry)) - return; + if (event->rb) { + /* + * Should be impossible, we set this when removing + * event->rb_entry and wait/clear when adding event->rb_entry. + */ + WARN_ON_ONCE(event->rcu_pending); - spin_lock_irqsave(&rb->event_lock, flags); - if (list_empty(&event->rb_entry)) - list_add(&event->rb_entry, &rb->event_list); - spin_unlock_irqrestore(&rb->event_lock, flags); -} + old_rb = event->rb; + event->rcu_batches = get_state_synchronize_rcu(); + event->rcu_pending = 1; -static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) -{ - unsigned long flags; + spin_lock_irqsave(&old_rb->event_lock, flags); + list_del_rcu(&event->rb_entry); + spin_unlock_irqrestore(&old_rb->event_lock, flags); + } - if (list_empty(&event->rb_entry)) - return; + if (event->rcu_pending && rb) { + cond_synchronize_rcu(event->rcu_batches); + event->rcu_pending = 0; + } - spin_lock_irqsave(&rb->event_lock, flags); - list_del_init(&event->rb_entry); - wake_up_all(&event->waitq); - spin_unlock_irqrestore(&rb->event_lock, flags); + if (rb) { + spin_lock_irqsave(&rb->event_lock, flags); + list_add_rcu(&event->rb_entry, &rb->event_list); + spin_unlock_irqrestore(&rb->event_lock, flags); + } + + rcu_assign_pointer(event->rb, rb); + + if (old_rb) { + ring_buffer_put(old_rb); + /* + * Since we detached before setting the new rb, so that we + * could attach the new rb, we could have missed a wakeup. + * Provide it now. + */ + wake_up_all(&event->waitq); + } } static void ring_buffer_wakeup(struct perf_event *event) @@ -3940,7 +3953,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) { struct perf_event *event = vma->vm_file->private_data; - struct ring_buffer *rb = event->rb; + struct ring_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); @@ -3948,18 +3961,14 @@ static void perf_mmap_close(struct vm_area_struct *vma) atomic_dec(&rb->mmap_count); if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) - return; + goto out_put; - /* Detach current event from the buffer. */ - rcu_assign_pointer(event->rb, NULL); - ring_buffer_detach(event, rb); + ring_buffer_attach(event, NULL); mutex_unlock(&event->mmap_mutex); /* If there's still other mmap()s of this buffer, we're done. */ - if (atomic_read(&rb->mmap_count)) { - ring_buffer_put(rb); /* can't be last */ - return; - } + if (atomic_read(&rb->mmap_count)) + goto out_put; /* * No other mmap()s, detach from all other events that might redirect @@ -3989,11 +3998,9 @@ again: * still restart the iteration to make sure we're not now * iterating the wrong list. */ - if (event->rb == rb) { - rcu_assign_pointer(event->rb, NULL); - ring_buffer_detach(event, rb); - ring_buffer_put(rb); /* can't be last, we still have one */ - } + if (event->rb == rb) + ring_buffer_attach(event, NULL); + mutex_unlock(&event->mmap_mutex); put_event(event); @@ -4018,6 +4025,7 @@ again: vma->vm_mm->pinned_vm -= mmap_locked; free_uid(mmap_user); +out_put: ring_buffer_put(rb); /* could be last */ } @@ -4135,7 +4143,6 @@ again: vma->vm_mm->pinned_vm += extra; ring_buffer_attach(event, rb); - rcu_assign_pointer(event->rb, rb); perf_event_init_userpage(event); perf_event_update_userpage(event); @@ -6934,7 +6941,7 @@ err_size: static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { - struct ring_buffer *rb = NULL, *old_rb = NULL; + struct ring_buffer *rb = NULL; int ret = -EINVAL; if (!output_event) @@ -6962,8 +6969,6 @@ set: if (atomic_read(&event->mmap_count)) goto unlock; - old_rb = event->rb; - if (output_event) { /* get the rb we want to redirect to */ rb = ring_buffer_get(output_event); @@ -6971,23 +6976,7 @@ set: goto unlock; } - if (old_rb) - ring_buffer_detach(event, old_rb); - - if (rb) - ring_buffer_attach(event, rb); - - rcu_assign_pointer(event->rb, rb); - - if (old_rb) { - ring_buffer_put(old_rb); - /* - * Since we detached before setting the new rb, so that we - * could attach the new rb, we could have missed a wakeup. - * Provide it now. - */ - wake_up_all(&event->waitq); - } + ring_buffer_attach(event, rb); ret = 0; unlock: From 6679b2ccc914e84384b1acf0de4423e3738cce9f Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Mon, 12 May 2014 15:05:01 +0200 Subject: [PATCH 126/178] drm/nvd9/therm: handle another kind of PWM fan Signed-off-by: Martin Peres Tested-by: SaveTheRobots Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c index 43fec17ea540..bbf117be572f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c @@ -40,6 +40,7 @@ pwm_info(struct nouveau_therm *therm, int line) case 0x00: return 2; case 0x19: return 1; case 0x1c: return 0; + case 0x1e: return 2; default: break; } From 0f1d360b2ee3a2a0f510d3f1bcd3f5ebe5d41265 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 20 May 2014 16:13:54 +1000 Subject: [PATCH 127/178] drm/gf119-/disp: fix nasty bug which can clobber SOR0's clock setup Fixes a LVDS bleed issue on Lenovo W530 that can occur under a number of circumstances. Cc: # v3.9+ Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c index 7762665ad8fd..876de9ac3793 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c @@ -1009,7 +1009,7 @@ exec_clkcmp(struct nv50_disp_priv *priv, int head, int id, } if (outp == 8) - return false; + return conf; data = exec_lookup(priv, head, outp, ctrl, dcb, &ver, &hdr, &cnt, &len, &info1); if (data == 0x0000) From 544092596e8ac269f70e70961b5e9381909c9b1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 5 May 2014 18:40:12 +0200 Subject: [PATCH 128/178] drm/radeon: also try GART for CPU accessed buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Placing them exclusively into VRAM might not work all the time. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=78297 Signed-off-by: Christian König Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_object.c | 38 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 19bec0dbfa38..72705fb40d55 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -699,22 +699,30 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 0); rdev = rbo->rdev; - if (bo->mem.mem_type == TTM_PL_VRAM) { - size = bo->mem.num_pages << PAGE_SHIFT; - offset = bo->mem.start << PAGE_SHIFT; - if ((offset + size) > rdev->mc.visible_vram_size) { - /* hurrah the memory is not visible ! */ - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); - rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; - r = ttm_bo_validate(bo, &rbo->placement, false, false); - if (unlikely(r != 0)) - return r; - offset = bo->mem.start << PAGE_SHIFT; - /* this should not happen */ - if ((offset + size) > rdev->mc.visible_vram_size) - return -EINVAL; - } + if (bo->mem.mem_type != TTM_PL_VRAM) + return 0; + + size = bo->mem.num_pages << PAGE_SHIFT; + offset = bo->mem.start << PAGE_SHIFT; + if ((offset + size) <= rdev->mc.visible_vram_size) + return 0; + + /* hurrah the memory is not visible ! */ + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); + rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; + r = ttm_bo_validate(bo, &rbo->placement, false, false); + if (unlikely(r == -ENOMEM)) { + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); + return ttm_bo_validate(bo, &rbo->placement, false, false); + } else if (unlikely(r != 0)) { + return r; } + + offset = bo->mem.start << PAGE_SHIFT; + /* this should never happen */ + if ((offset + size) > rdev->mc.visible_vram_size) + return -EINVAL; + return 0; } From 2fc5703abda201f138faf63bdca743d04dbf4b1a Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 5 May 2014 15:42:18 -0400 Subject: [PATCH 129/178] drm/radeon: check VCE relocation buffer range v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2 (chk): fix image size storage v3 (chk): fix UV size calculation Signed-off-by: Leo Liu Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 3 +- drivers/gpu/drm/radeon/radeon_vce.c | 130 +++++++++++++++++++++------- 2 files changed, 102 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 68528619834a..91be3d6eb845 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1642,6 +1642,7 @@ struct radeon_vce { unsigned fb_version; atomic_t handles[RADEON_MAX_VCE_HANDLES]; struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; + unsigned img_size[RADEON_MAX_VCE_HANDLES]; struct delayed_work idle_work; }; @@ -1655,7 +1656,7 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); void radeon_vce_note_usage(struct radeon_device *rdev); -int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi, unsigned size); int radeon_vce_cs_parse(struct radeon_cs_parser *p); bool radeon_vce_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index f73324c81491..3971d968af6c 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -443,13 +443,16 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, * @p: parser context * @lo: address of lower dword * @hi: address of higher dword + * @size: size of checker for relocation buffer * * Patch relocation inside command stream with real buffer address */ -int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi, + unsigned size) { struct radeon_cs_chunk *relocs_chunk; - uint64_t offset; + struct radeon_cs_reloc *reloc; + uint64_t start, end, offset; unsigned idx; relocs_chunk = &p->chunks[p->chunk_relocs_idx]; @@ -462,14 +465,59 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) return -EINVAL; } - offset += p->relocs_ptr[(idx / 4)]->gpu_offset; + reloc = p->relocs_ptr[(idx / 4)]; + start = reloc->gpu_offset; + end = start + radeon_bo_size(reloc->robj); + start += offset; - p->ib.ptr[lo] = offset & 0xFFFFFFFF; - p->ib.ptr[hi] = offset >> 32; + p->ib.ptr[lo] = start & 0xFFFFFFFF; + p->ib.ptr[hi] = start >> 32; + + if (end <= start) { + DRM_ERROR("invalid reloc offset %llX!\n", offset); + return -EINVAL; + } + if ((end - start) < size) { + DRM_ERROR("buffer to small (%d / %d)!\n", + (unsigned)(end - start), size); + return -EINVAL; + } return 0; } +/** + * radeon_vce_validate_handle - validate stream handle + * + * @p: parser context + * @handle: handle to validate + * + * Validates the handle and return the found session index or -EINVAL + * we we don't have another free session index. + */ +int radeon_vce_validate_handle(struct radeon_cs_parser *p, uint32_t handle) +{ + unsigned i; + + /* validate the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->rdev->vce.handles[i]) == handle) + return i; + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { + p->rdev->vce.filp[i] = p->filp; + p->rdev->vce.img_size[i] = 0; + return i; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + /** * radeon_vce_cs_parse - parse and validate the command stream * @@ -478,8 +526,10 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) */ int radeon_vce_cs_parse(struct radeon_cs_parser *p) { - uint32_t handle = 0; - bool destroy = false; + int session_idx = -1; + bool destroyed = false; + uint32_t tmp, handle = 0; + uint32_t *size = &tmp; int i, r; while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) { @@ -491,13 +541,29 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p) return -EINVAL; } + if (destroyed) { + DRM_ERROR("No other command allowed after destroy!\n"); + return -EINVAL; + } + switch (cmd) { case 0x00000001: // session handle = radeon_get_ib_value(p, p->idx + 2); + session_idx = radeon_vce_validate_handle(p, handle); + if (session_idx < 0) + return session_idx; + size = &p->rdev->vce.img_size[session_idx]; break; case 0x00000002: // task info + break; + case 0x01000001: // create + *size = radeon_get_ib_value(p, p->idx + 8) * + radeon_get_ib_value(p, p->idx + 10) * + 8 * 3 / 2; + break; + case 0x04000001: // config extension case 0x04000002: // pic control case 0x04000005: // rate control @@ -506,23 +572,39 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p) break; case 0x03000001: // encode - r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9); + r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9, + *size); if (r) return r; - r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11); + r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11, + *size / 3); if (r) return r; break; case 0x02000001: // destroy - destroy = true; + destroyed = true; break; case 0x05000001: // context buffer + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2, + *size * 2); + if (r) + return r; + break; + case 0x05000004: // video bitstream buffer + tmp = radeon_get_ib_value(p, p->idx + 4); + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2, + tmp); + if (r) + return r; + break; + case 0x05000005: // feedback buffer - r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2); + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2, + 4096); if (r) return r; break; @@ -532,33 +614,21 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p) return -EINVAL; } + if (session_idx == -1) { + DRM_ERROR("no session command at start of IB\n"); + return -EINVAL; + } + p->idx += len / 4; } - if (destroy) { + if (destroyed) { /* IB contains a destroy msg, free the handle */ for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); - - return 0; - } - - /* create or encode, validate the handle */ - for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { - if (atomic_read(&p->rdev->vce.handles[i]) == handle) - return 0; } - /* handle not found try to alloc a new one */ - for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { - if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { - p->rdev->vce.filp[i] = p->filp; - return 0; - } - } - - DRM_ERROR("No more free VCE handles!\n"); - return -EINVAL; + return 0; } /** From 89d2618dbd114e679b860c3de1df3fe43ee40f6e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 May 2014 18:26:23 -0400 Subject: [PATCH 130/178] drm/radeon: fix DCE83 check for mullins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mullins is DCE83 just like Kabini. Set the proper number of endpoints on mullins. Signed-off-by: Alex Deucher Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 91be3d6eb845..8149e7cf4303 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2641,7 +2641,8 @@ void r100_pll_errata_after_index(struct radeon_device *rdev); #define ASIC_IS_DCE8(rdev) ((rdev->family >= CHIP_BONAIRE)) #define ASIC_IS_DCE81(rdev) ((rdev->family == CHIP_KAVERI)) #define ASIC_IS_DCE82(rdev) ((rdev->family == CHIP_BONAIRE)) -#define ASIC_IS_DCE83(rdev) ((rdev->family == CHIP_KABINI)) +#define ASIC_IS_DCE83(rdev) ((rdev->family == CHIP_KABINI) || \ + (rdev->family == CHIP_MULLINS)) #define ASIC_IS_LOMBOK(rdev) ((rdev->ddev->pdev->device == 0x6849) || \ (rdev->ddev->pdev->device == 0x6850) || \ From d8ade3526b2aa0505132c404c05a38b73ea15490 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 May 2014 20:04:03 -0400 Subject: [PATCH 131/178] drm/radeon: handle non-VGA class pci devices with ATRM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer PX systems have non-VGA pci class dGPUs. Update the ATRM fetch method to handle those cases. bug: https://bugzilla.kernel.org/show_bug.cgi?id=75401 Signed-off-by: Alex Deucher Signed-off-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_bios.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index b3633d9a5317..9ab30976287d 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -196,6 +196,20 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev) } } + if (!found) { + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { + dhandle = ACPI_HANDLE(&pdev->dev); + if (!dhandle) + continue; + + status = acpi_get_handle(dhandle, "ATRM", &atrm_handle); + if (!ACPI_FAILURE(status)) { + found = true; + break; + } + } + } + if (!found) return false; From 4906f6891ef987637b766dfaef1c6fc95804253a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 12 May 2014 14:46:11 +0200 Subject: [PATCH 132/178] drm/radeon: fix page directory update size estimation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take padding into account as well. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=75651 Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 2aae6ce49d32..d9ab99f47612 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -595,7 +595,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, ndw = 64; /* assume the worst case */ - ndw += vm->max_pde_used * 12; + ndw += vm->max_pde_used * 16; /* update too big for an IB */ if (ndw > 0xfffff) From 4b09556660bfe1b43d72ca858524c6baf2c6cb1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sat, 10 May 2014 18:17:09 +0200 Subject: [PATCH 133/178] drm/radeon: fix buffer placement under memory pressure v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some buffers (UVD/VM page tables) must be placed in VRAM, but the byte restriction for moving buffers didn't took this into account. v2: keep closer to the original code Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Marek Olšák --- drivers/gpu/drm/radeon/radeon_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 72705fb40d55..4faa4d6f9bb4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -458,7 +458,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, * into account. We don't want to disallow buffer moves * completely. */ - if (current_domain != RADEON_GEM_DOMAIN_CPU && + if ((lobj->alt_domain & current_domain) != 0 && (domain & current_domain) == 0 && /* will be moved */ bytes_moved > bytes_moved_threshold) { /* don't move it */ From 4955bb073f1be6dd884b5d10041ba4bade6495bf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 5 May 2014 16:40:42 -0400 Subject: [PATCH 134/178] drm/radeon: fix register typo on si MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probably a copy paste typo. Signed-off-by: Alex Deucher Signed-off-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/sid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 683532f84931..7321283602ce 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -107,8 +107,8 @@ #define SPLL_CHG_STATUS (1 << 1) #define SPLL_CNTL_MODE 0x618 #define SPLL_SW_DIR_CONTROL (1 << 0) -# define SPLL_REFCLK_SEL(x) ((x) << 8) -# define SPLL_REFCLK_SEL_MASK 0xFF00 +# define SPLL_REFCLK_SEL(x) ((x) << 26) +# define SPLL_REFCLK_SEL_MASK (3 << 26) #define CG_SPLL_SPREAD_SPECTRUM 0x620 #define SSEN (1 << 0) From 74ad54f249de39bc040cce7237b1b854a9c6f0ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 13 May 2014 12:50:54 +0200 Subject: [PATCH 135/178] drm/radeon: fix typo in finding PLL params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the limit is raised to high. Signed-off-by: Christian König Tested-by: Ken Moffat --- drivers/gpu/drm/radeon/radeon_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 408b6ac53f0b..f00dbbf4d806 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -999,7 +999,7 @@ void radeon_compute_pll_avivo(struct radeon_pll *pll, /* avoid high jitter with small fractional dividers */ if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV && (fb_div % 10)) { - fb_div_min = max(fb_div_min, (9 - (fb_div % 10)) * 20 + 60); + fb_div_min = max(fb_div_min, (9 - (fb_div % 10)) * 20 + 50); if (fb_div < fb_div_min) { unsigned tmp = DIV_ROUND_UP(fb_div_min, fb_div); fb_div *= tmp; From 24f47acc78b0ab5e2201f859fe1f693ae90c7c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Wed, 7 May 2014 16:35:24 -0400 Subject: [PATCH 136/178] drm/radeon: avoid segfault on device open when accel is not working. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When accel is not working on device with virtual address space radeon segfault because the ib buffer is NULL and trying to map it inside the virtual address space trigger segfault. This patch only map the ib buffer if accel is working. Cc: Signed-off-by: Jérôme Glisse Reviewed-by: Alex Deucher Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon_kms.c | 59 +++++++++++++++-------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0cc47f12d995..eaaedba04675 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -577,28 +577,29 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return r; } - r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (r) { - radeon_vm_fini(rdev, &fpriv->vm); - kfree(fpriv); - return r; + if (rdev->accel_working) { + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); + if (r) { + radeon_vm_fini(rdev, &fpriv->vm); + kfree(fpriv); + return r; + } + + /* map the ib pool buffer read only into + * virtual address space */ + bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, + rdev->ring_tmp_bo.bo); + r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, + RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_SNOOPED); + + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); + if (r) { + radeon_vm_fini(rdev, &fpriv->vm); + kfree(fpriv); + return r; + } } - - /* map the ib pool buffer read only into - * virtual address space */ - bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, - rdev->ring_tmp_bo.bo); - r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, - RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_SNOOPED); - - radeon_bo_unreserve(rdev->ring_tmp_bo.bo); - if (r) { - radeon_vm_fini(rdev, &fpriv->vm); - kfree(fpriv); - return r; - } - file_priv->driver_priv = fpriv; } @@ -626,13 +627,15 @@ void radeon_driver_postclose_kms(struct drm_device *dev, struct radeon_bo_va *bo_va; int r; - r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (!r) { - bo_va = radeon_vm_bo_find(&fpriv->vm, - rdev->ring_tmp_bo.bo); - if (bo_va) - radeon_vm_bo_rmv(rdev, bo_va); - radeon_bo_unreserve(rdev->ring_tmp_bo.bo); + if (rdev->accel_working) { + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); + if (!r) { + bo_va = radeon_vm_bo_find(&fpriv->vm, + rdev->ring_tmp_bo.bo); + if (bo_va) + radeon_vm_bo_rmv(rdev, bo_va); + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); + } } radeon_vm_fini(rdev, &fpriv->vm); From 4f2f203976964e267dc477de6648bdb3acd2b74b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 May 2014 19:21:29 -0400 Subject: [PATCH 137/178] drm/radeon/pm: don't allow debugfs/sysfs access when PX card is off (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the PX card is off don't try and access it. Avoid hw access to the card while it's off (e.g., reading back invalid temperature). v2: be less strict bug: https://bugzilla.kernel.org/show_bug.cgi?id=76321 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon_pm.c | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index f30b8426eee2..53d6e1bb48dc 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -361,6 +361,11 @@ static ssize_t radeon_set_pm_profile(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; + /* Can't set profile when the card is off */ + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + mutex_lock(&rdev->pm.mutex); if (rdev->pm.pm_method == PM_METHOD_PROFILE) { if (strncmp("default", buf, strlen("default")) == 0) @@ -409,6 +414,13 @@ static ssize_t radeon_set_pm_method(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; + /* Can't set method when the card is off */ + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { + count = -EINVAL; + goto fail; + } + /* we don't support the legacy modes with dpm */ if (rdev->pm.pm_method == PM_METHOD_DPM) { count = -EINVAL; @@ -446,6 +458,10 @@ static ssize_t radeon_get_dpm_state(struct device *dev, struct radeon_device *rdev = ddev->dev_private; enum radeon_pm_state_type pm = rdev->pm.dpm.user_state; + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return snprintf(buf, PAGE_SIZE, "off\n"); + return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -459,6 +475,11 @@ static ssize_t radeon_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; + /* Can't set dpm state when the card is off */ + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + mutex_lock(&rdev->pm.mutex); if (strncmp("battery", buf, strlen("battery")) == 0) rdev->pm.dpm.user_state = POWER_STATE_TYPE_BATTERY; @@ -485,6 +506,10 @@ static ssize_t radeon_get_dpm_forced_performance_level(struct device *dev, struct radeon_device *rdev = ddev->dev_private; enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return snprintf(buf, PAGE_SIZE, "off\n"); + return snprintf(buf, PAGE_SIZE, "%s\n", (level == RADEON_DPM_FORCED_LEVEL_AUTO) ? "auto" : (level == RADEON_DPM_FORCED_LEVEL_LOW) ? "low" : "high"); @@ -500,6 +525,11 @@ static ssize_t radeon_set_dpm_forced_performance_level(struct device *dev, enum radeon_dpm_forced_level level; int ret = 0; + /* Can't force performance level when the card is off */ + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + mutex_lock(&rdev->pm.mutex); if (strncmp("low", buf, strlen("low")) == 0) { level = RADEON_DPM_FORCED_LEVEL_LOW; @@ -538,8 +568,14 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); + struct drm_device *ddev = rdev->ddev; int temp; + /* Can't get temperature when the card is off */ + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (rdev->asic->pm.get_temperature) temp = radeon_get_temperature(rdev); else @@ -1614,8 +1650,12 @@ static int radeon_debugfs_pm_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; + struct drm_device *ddev = rdev->ddev; - if (rdev->pm.dpm_enabled) { + if ((rdev->flags & RADEON_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { + seq_printf(m, "PX asic powered off\n"); + } else if (rdev->pm.dpm_enabled) { mutex_lock(&rdev->pm.mutex); if (rdev->asic->dpm.debugfs_print_current_performance_level) radeon_dpm_debugfs_print_current_performance_level(rdev, m); From e1618d461ca18d40f9c3ef70598abb72e75d27ae Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 20 May 2014 10:59:26 -0400 Subject: [PATCH 138/178] vlan: Fix build error wth vlan_get_encap_level() The new function vlan_get_encap_level() uses vlan_dev_priv() which is only conditionally avaialble when VLAN support is enabled. Make vlan_get_encap_level() conditionally available as well. Fixes: 44a4085538c8 ("bonding: Fix stacked device detection in arp monitoring") Reported-by: Stephen Rothwell CC: Stephen Rothwell Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c901b13b6f03..b2acc4a1b13c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -198,6 +198,12 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, const struct net_device *by_dev); extern bool vlan_uses_dev(const struct net_device *dev); + +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG_ON(!is_vlan_dev(dev)); + return vlan_dev_priv(dev)->nest_level; +} #else static inline struct net_device * __vlan_find_dev_deep(struct net_device *real_dev, @@ -264,6 +270,11 @@ static inline bool vlan_uses_dev(const struct net_device *dev) { return false; } +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG(); + return 0; +} #endif static inline bool vlan_hw_offload_capable(netdev_features_t features, @@ -485,9 +496,4 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } -static inline int vlan_get_encap_level(struct net_device *dev) -{ - BUG_ON(!is_vlan_dev(dev)); - return vlan_dev_priv(dev)->nest_level; -} #endif /* !(_LINUX_IF_VLAN_H_) */ From 78ff4be45a4c51d8fb21ad92e4fabb467c6c3eeb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 May 2014 11:36:56 +0200 Subject: [PATCH 139/178] ip_tunnel: Initialize the fallback device properly We need to initialize the fallback device to have a correct mtu set on this device. Otherwise the mtu is set to null and the device is unusable. Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") Cc: Pravin B Shelar Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 49721d78df75..2acc2337d38b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -883,6 +883,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, */ if (!IS_ERR(itn->fb_tunnel_dev)) { itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); } rtnl_unlock(); From 0b5a958cf4df3a5cd578b861471e62138f55c85e Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Tue, 20 May 2014 11:38:56 +0200 Subject: [PATCH 140/178] can: peak_pci: prevent use after free at netdev removal As remarked by Christopher R. Baker in his post at http://marc.info/?l=linux-can&m=139707295706465&w=2 there's a possibility for an use after free condition at device removal. This simplified patch introduces an additional variable to prevent the issue. Thanks for catching this. Cc: linux-stable Reported-by: Christopher R. Baker Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/peak_pci.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index c540e3d12e3d..564933ae218c 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -551,7 +551,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct sja1000_priv *priv; struct peak_pci_chan *chan; - struct net_device *dev; + struct net_device *dev, *prev_dev; void __iomem *cfg_base, *reg_base; u16 sub_sys_id, icr; int i, err, channels; @@ -688,11 +688,13 @@ failure_remove_channels: writew(0x0, cfg_base + PITA_ICR + 2); chan = NULL; - for (dev = pci_get_drvdata(pdev); dev; dev = chan->prev_dev) { - unregister_sja1000dev(dev); - free_sja1000dev(dev); + for (dev = pci_get_drvdata(pdev); dev; dev = prev_dev) { priv = netdev_priv(dev); chan = priv->priv; + prev_dev = chan->prev_dev; + + unregister_sja1000dev(dev); + free_sja1000dev(dev); } /* free any PCIeC resources too */ @@ -726,10 +728,12 @@ static void peak_pci_remove(struct pci_dev *pdev) /* Loop over all registered devices */ while (1) { + struct net_device *prev_dev = chan->prev_dev; + dev_info(&pdev->dev, "removing device %s\n", dev->name); unregister_sja1000dev(dev); free_sja1000dev(dev); - dev = chan->prev_dev; + dev = prev_dev; if (!dev) { /* do that only for first channel */ From 6c67c7c38cf32c2a9cbccb6b21aadf61a85fbfb4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 May 2014 14:48:05 +0100 Subject: [PATCH 141/178] AFS: Fix cache manager service handlers Fix the cache manager RPC service handlers. The afs_send_empty_reply() and afs_send_simple_reply() functions: (a) Kill the call and free up the buffers associated with it if they fail. (b) Return with call intact if it they succeed. However, none of the callers actually check the result or clean up if successful - and may use the now non-existent data if it fails. This was detected by Dan Carpenter using a static checker: The patch 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC." from Apr 26, 2007, leads to the following static checker warning: "fs/afs/cmservice.c:155 SRXAFSCB_CallBack() warn: 'call' was already freed." Reported-by: Dan Carpenter Signed-off-by: David Howells --- fs/afs/cmservice.c | 19 +++++++++++++++++++ fs/afs/rxrpc.c | 43 +++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 1c8c6cc6de30..4b0eff6da674 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -130,6 +130,15 @@ static void afs_cm_destructor(struct afs_call *call) { _enter(""); + /* Break the callbacks here so that we do it after the final ACK is + * received. The step number here must match the final number in + * afs_deliver_cb_callback(). + */ + if (call->unmarshall == 6) { + ASSERT(call->server && call->count && call->request); + afs_break_callbacks(call->server, call->count, call->request); + } + afs_put_server(call->server); call->server = NULL; kfree(call->buffer); @@ -272,6 +281,16 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, _debug("trailer"); if (skb->len != 0) return -EBADMSG; + + /* Record that the message was unmarshalled successfully so + * that the call destructor can know do the callback breaking + * work, even if the final ACK isn't received. + * + * If the step number changes, then afs_cm_destructor() must be + * updated also. + */ + call->unmarshall++; + case 6: break; } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ef943df73b8c..9226a6674d7f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -183,6 +183,19 @@ static void afs_free_call(struct afs_call *call) kfree(call); } +/* + * End a call + */ +static void afs_end_call(struct afs_call *call) +{ + if (call->rxcall) { + rxrpc_kernel_end_call(call->rxcall); + call->rxcall = NULL; + } + call->type->destructor(call); + afs_free_call(call); +} + /* * allocate a call with flat request and reply buffers */ @@ -383,11 +396,8 @@ error_do_abort: rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); - rxrpc_kernel_end_call(rxcall); - call->rxcall = NULL; error_kill_call: - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" = %d", ret); return ret; } @@ -509,12 +519,8 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state >= AFS_CALL_COMPLETE) { while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); - if (call->incoming) { - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); - } + if (call->incoming) + afs_end_call(call); } _leave(""); @@ -564,10 +570,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) } _debug("call complete"); - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" = %d", ret); return ret; } @@ -790,10 +793,7 @@ void afs_send_empty_reply(struct afs_call *call) _debug("oom"); rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); default: - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" [error]"); return; } @@ -823,17 +823,16 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) call->state = AFS_CALL_AWAIT_ACK; n = rxrpc_kernel_send_data(call->rxcall, &msg, len); if (n >= 0) { + /* Success */ _leave(" [replied]"); return; } + if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); } - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" [error]"); } From a1b8ff4c97b4375d21b6d6c45d75877303f61b3b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 May 2014 15:55:21 -0400 Subject: [PATCH 142/178] nfsd4: remove lockowner when removing lock stateid The nfsv4 state code has always assumed a one-to-one correspondance between lock stateid's and lockowners even if it appears not to in some places. We may actually change that, but for now when FREE_STATEID releases a lock stateid it also needs to release the parent lockowner. Symptoms were a subsequent LOCK crashing in find_lockowner_str when it calls same_lockowner_ino on a lockowner that unexpectedly has an empty so_stateids list. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 32b699bebb9c..89e42409b237 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3717,9 +3717,16 @@ out: static __be32 nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) { - if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) return nfserr_locks_held; - release_lock_stateid(stp); + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + unhash_lockowner(lo); return nfs_ok; } From 27b11428b7de097c42f205beabb1764f4365443b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 8 May 2014 11:19:41 -0400 Subject: [PATCH 143/178] nfsd4: warn on finding lockowner without stateid's The current code assumes a one-to-one lockowner<->lock stateid correspondance. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 89e42409b237..9a77a5a21557 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4166,6 +4166,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c if (!same_owner_str(&lo->lo_owner, owner, clid)) return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } lst = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); return lst->st_file->fi_inode == inode; From 8ecc1bad4c9b485ceb47182d282980d0b0fe20ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toralf=20F=C3=B6rster?= Date: Mon, 19 May 2014 10:26:31 +0200 Subject: [PATCH 144/178] sparc64: fix format string mismatch in arch/sparc/kernel/sysfs.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit found by cppcheck Signed-off-by: Toralf Förster Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index a364000ca1aa..7f41d40b7e6e 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -151,7 +151,7 @@ static ssize_t store_mmustat_enable(struct device *s, size_t count) { unsigned long val, err; - int ret = sscanf(buf, "%ld", &val); + int ret = sscanf(buf, "%lu", &val); if (ret != 1) return -EINVAL; From bf63ac73b3e132e6bf0c8798aba7b277c3316e19 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 19 May 2014 12:15:49 -0700 Subject: [PATCH 145/178] net_sched: fix an oops in tcindex filter Kelly reported the following crash: IP: [] tcf_action_exec+0x46/0x90 PGD 3009067 PUD 300c067 PMD 11ff30067 PTE 800000011634b060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 1 PID: 639 Comm: dhclient Not tainted 3.15.0-rc4+ #342 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8801169ecd00 ti: ffff8800d21b8000 task.ti: ffff8800d21b8000 RIP: 0010:[] [] tcf_action_exec+0x46/0x90 RSP: 0018:ffff8800d21b9b90 EFLAGS: 00010283 RAX: 00000000ffffffff RBX: ffff88011634b8e8 RCX: ffff8800cf7133d8 RDX: ffff88011634b900 RSI: ffff8800cf7133e0 RDI: ffff8800d210f840 RBP: ffff8800d21b9bb0 R08: ffffffff8287bf60 R09: 0000000000000001 R10: ffff8800d2b22b24 R11: 0000000000000001 R12: ffff8800d210f840 R13: ffff8800d21b9c50 R14: ffff8800cf7133e0 R15: ffff8800cad433d8 FS: 00007f49723e1840(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88011634b8f0 CR3: 00000000ce469000 CR4: 00000000000006e0 Stack: ffff8800d2170188 ffff8800d210f840 ffff8800d2171b90 0000000000000000 ffff8800d21b9be8 ffffffff817c55bb ffff8800d21b9c50 ffff8800d2171b90 ffff8800d210f840 ffff8800d21b0300 ffff8800d21b9c50 ffff8800d21b9c18 Call Trace: [] tcindex_classify+0x88/0x9b [] tc_classify_compat+0x3e/0x7b [] tc_classify+0x25/0x9f [] htb_enqueue+0x55/0x27a [] dsmark_enqueue+0x165/0x1a4 [] __dev_queue_xmit+0x35e/0x536 [] dev_queue_xmit+0x10/0x12 [] packet_sendmsg+0xb26/0xb9a [] ? __lock_acquire+0x3ae/0xdf3 [] __sock_sendmsg_nosec+0x25/0x27 [] sock_aio_write+0xd0/0xe7 [] do_sync_write+0x59/0x78 [] vfs_write+0xb5/0x10a [] SyS_write+0x49/0x7f [] system_call_fastpath+0x16/0x1b This is because we memcpy struct tcindex_filter_result which contains struct tcf_exts, obviously struct list_head can not be simply copied. This is a regression introduced by commit 33be627159913b094bb578 (net_sched: act: use standard struct list_head). It's not very easy to fix it as the code is a mess: if (old_r) memcpy(&cr, r, sizeof(cr)); else { memset(&cr, 0, sizeof(cr)); tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } ... tcf_exts_change(tp, &cr.exts, &e); ... memcpy(r, &cr, sizeof(cr)); the above code should equal to: tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; ... if (old_r) tcf_exts_change(tp, &r->exts, &e); else tcf_exts_change(tp, &cr.exts, &e); ... r->res = cr.res; after this change, since there is no need to copy struct tcf_exts. And it also fixes other places zero'ing struct's contains struct tcf_exts. Fixes: commit 33be627159913b0 (net_sched: act: use standard struct list_head) Reported-by: Kelly Anderson Tested-by: Kelly Anderson Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index eed8404443d8..f435a88d899a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -188,6 +188,12 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, }; +static void tcindex_filter_result_init(struct tcindex_filter_result *r) +{ + memset(r, 0, sizeof(*r)); + tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, @@ -207,15 +213,11 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, return err; memcpy(&cp, p, sizeof(cp)); - memset(&new_filter_result, 0, sizeof(new_filter_result)); - tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + tcindex_filter_result_init(&new_filter_result); + tcindex_filter_result_init(&cr); if (old_r) - memcpy(&cr, r, sizeof(cr)); - else { - memset(&cr, 0, sizeof(cr)); - tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - } + cr.res = r->res; if (tb[TCA_TCINDEX_HASH]) cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -267,9 +269,14 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp.perfect && !cp.h) { if (valid_perfect_hash(&cp)) { + int i; + cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); if (!cp.perfect) goto errout; + for (i = 0; i < cp.hash; i++) + tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT, + TCA_TCINDEX_POLICE); balloc = 1; } else { cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); @@ -295,14 +302,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr.res, base); } - tcf_exts_change(tp, &cr.exts, &e); + if (old_r) + tcf_exts_change(tp, &r->exts, &e); + else + tcf_exts_change(tp, &cr.exts, &e); tcf_tree_lock(tp); if (old_r && old_r != r) - memset(old_r, 0, sizeof(*old_r)); + tcindex_filter_result_init(old_r); memcpy(p, &cp, sizeof(cp)); - memcpy(r, &cr, sizeof(cr)); + r->res = cr.res; if (r == &new_filter_result) { struct tcindex_filter **fp; From 5a9a55bf9157d3490b0c8c4c81d4708602c26e07 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 21 May 2014 14:02:35 -0700 Subject: [PATCH 146/178] dma: mv_xor: Flush descriptors before activating a channel We need to use writel() instead of writel_relaxed() when starting a channel, to ensure all the descriptors have been flushed before the activation. While at it, remove the unneeded read-modify-write and make the code simpler. Cc: Signed-off-by: Lior Amsalem Signed-off-by: Ezequiel Garcia Signed-off-by: Dan Williams --- drivers/dma/mv_xor.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 766b68ed505c..394cbc5c93e3 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -191,12 +191,10 @@ static void mv_set_mode(struct mv_xor_chan *chan, static void mv_chan_activate(struct mv_xor_chan *chan) { - u32 activation; - dev_dbg(mv_chan_to_devp(chan), " activate chan.\n"); - activation = readl_relaxed(XOR_ACTIVATION(chan)); - activation |= 0x1; - writel_relaxed(activation, XOR_ACTIVATION(chan)); + + /* writel ensures all descriptors are flushed before activation */ + writel(BIT(0), XOR_ACTIVATION(chan)); } static char mv_chan_is_busy(struct mv_xor_chan *chan) From c1f43dd9c20d85e66c4d77e284f64ac114abe3f8 Mon Sep 17 00:00:00 2001 From: Xuelin Shi Date: Wed, 21 May 2014 14:02:37 -0700 Subject: [PATCH 147/178] dmaengine: fix dmaengine_unmap failure The count which is used to get_unmap_data maybe not the same as the count computed in dmaengine_unmap which causes to free data in a wrong pool. This patch fixes this issue by keeping the map count with unmap_data structure and use this count to get the pool. Cc: Signed-off-by: Xuelin Shi Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 2 ++ include/linux/dmaengine.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index a886713937fd..d5d30ed863ce 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1009,6 +1009,7 @@ static void dmaengine_unmap(struct kref *kref) dma_unmap_page(dev, unmap->addr[i], unmap->len, DMA_BIDIRECTIONAL); } + cnt = unmap->map_cnt; mempool_free(unmap, __get_unmap_pool(cnt)->pool); } @@ -1074,6 +1075,7 @@ dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags) memset(unmap, 0, sizeof(*unmap)); kref_init(&unmap->kref); unmap->dev = dev; + unmap->map_cnt = nr; return unmap; } diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8300fb87b84a..72cb0ddb9678 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -429,6 +429,7 @@ typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); typedef void (*dma_async_tx_callback)(void *dma_async_param); struct dmaengine_unmap_data { + u8 map_cnt; u8 to_cnt; u8 from_cnt; u8 bidi_cnt; From 4de462ab63e23953fd05da511aeb460ae10cc726 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 May 2014 21:56:34 -0700 Subject: [PATCH 148/178] ipv6: gro: fix CHECKSUM_COMPLETE support When GRE support was added in linux-3.14, CHECKSUM_COMPLETE handling broke on GRE+IPv6 because we did not update/use the appropriate csum : GRO layer is supposed to use/update NAPI_GRO_CB(skb)->csum instead of skb->csum Tested using a GRE tunnel and IPv6 traffic. GRO aggregation now happens at the first level (ethernet device) instead of being done in gre tunnel. Native IPv6+TCP is still properly aggregated. Fixes: bf5a755f5e918 ("net-gre-gro: Add GRE support to the GRO stack") Signed-off-by: Eric Dumazet Cc: Jerry Chu Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 6 +----- net/ipv6/tcpv6_offload.c | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 59f95affceb0..b2f091566f88 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -196,7 +196,6 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int off; u16 flush = 1; int proto; - __wsum csum; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); @@ -264,13 +263,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(skb)->flush |= flush; - csum = skb->csum; - skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + skb_gro_postpull_rcsum(skb, iph, nlen); pp = ops->callbacks.gro_receive(head, skb); - skb->csum = csum; - out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 0d78132ff18a..8517d3cd1aed 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: From 89df20d951b310f3e33422554054ae6e58a3c7a9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 May 2014 11:38:18 +0200 Subject: [PATCH 149/178] stmmac: Remove unbalanced clk_disable call The stmmac_open call was calling clk_disable_unprepare on phy init failure, but it never calls clk_prepare_enable, this causes a WARN_ON in the clk framework to trigger if for some reason phy init fails. Signed-off-by: Hans de Goede Acked-by: Giuseppe Cavallaro Acked-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d940034acdd4..0f4841d2e8dc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1704,7 +1704,7 @@ static int stmmac_open(struct net_device *dev) if (ret) { pr_err("%s: Cannot attach to PHY (error: %d)\n", __func__, ret); - goto phy_error; + return ret; } } @@ -1779,8 +1779,6 @@ init_error: dma_desc_error: if (priv->phydev) phy_disconnect(priv->phydev); -phy_error: - clk_disable_unprepare(priv->stmmac_clk); return ret; } From b0db5cdf3975134ca967e8b8370a8f0c91ce2329 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 20 May 2014 13:52:13 +0200 Subject: [PATCH 150/178] net: doc: Update references to skb->rxhash In commit 61b905da33 ("net: Rename skb->rxhash to skb->hash"), skb->rxhash was renamed to skb->hash. Update references in Documentation accordingly. Signed-off-by: Tobias Klauser Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 2 +- Documentation/networking/packet_mmap.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 81f940f4e884..e3ba753cb714 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -277,7 +277,7 @@ Possible BPF extensions are shown in the following table: mark skb->mark queue skb->queue_mapping hatype skb->dev->type - rxhash skb->rxhash + rxhash skb->hash cpu raw_smp_processor_id() vlan_tci vlan_tx_tag_get(skb) vlan_pr vlan_tx_tag_present(skb) diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 6fea79efb4cb..38112d512f47 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -578,7 +578,7 @@ processes. This also works in combination with mmap(2) on packet sockets. Currently implemented fanout policies are: - - PACKET_FANOUT_HASH: schedule to socket by skb's rxhash + - PACKET_FANOUT_HASH: schedule to socket by skb's packet hash - PACKET_FANOUT_LB: schedule to socket by round-robin - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on - PACKET_FANOUT_RND: schedule to socket by random selection From d6b694c0b3f20c3ee54d29d4109fa1978d11a033 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 May 2014 11:24:39 -0400 Subject: [PATCH 151/178] bonding: Don't assume 802.1Q when sending alb learning packets. TLB/ALB learning packets always assume 802.1Q vlan protocol, but that is no longer the case since we now have support for Q-in-Q on top of bonding. Pass the vlan protocol to alb_send_lp_vid() so that the packets are properly tagged. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Signed-off-by: Vlad Yasevich Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index e53847884319..2ec945c3b9ba 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -995,7 +995,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) /*********************** tlb/rlb shared functions *********************/ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], - u16 vid) + __be16 vlan_proto, u16 vid) { struct learning_pkt pkt; struct sk_buff *skb; @@ -1021,7 +1021,7 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], skb->dev = slave->dev; if (vid) { - skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid); + skb = vlan_put_tag(skb, vlan_proto, vid); if (!skb) { pr_err("%s: Error: failed to insert VLAN tag\n", slave->bond->dev->name); @@ -1040,13 +1040,14 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) struct list_head *iter; /* send untagged */ - alb_send_lp_vid(slave, mac_addr, 0); + alb_send_lp_vid(slave, mac_addr, 0, 0); /* loop through vlans and send one packet for each */ rcu_read_lock(); netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); } rcu_read_unlock(); From d0c21d43a5a12aaebb1e42e10cf78e6491fc9e5a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 May 2014 13:19:48 -0400 Subject: [PATCH 152/178] bonding: Send ALB learning packets using the right source ALB learning packets are currentlyalways sent using the slave mac address for all vlans configured on top of bond. This is not always correct, as vlans may change their mac address. This patch introduced a concept of strict matching where the source of learning packets can either strictly match the address passed in, or it can determine a more correct address to use. There are 3 casese to consider: 1) Switchover. In this case, we have a new active slave and we need tell the switch about all addresses available on the slave. 2) Monitor. We'll periodically refresh learning info for all slaves. In this case, we refresh all addresses for current active, and just the slave address for other slaves. 3) Teaching of disabled adddress. This happens as part of the failover and in this case, we alwyas to use just the address provided. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 49 ++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 2ec945c3b9ba..93580a47cc54 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -82,7 +82,8 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) } /* Forward declaration */ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], + bool strict_match); static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); static void rlb_src_unlink(struct bonding *bond, u32 index); static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, @@ -459,7 +460,7 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) bond->alb_info.rlb_promisc_timeout_counter = 0; - alb_send_learning_packets(bond->curr_active_slave, addr); + alb_send_learning_packets(bond->curr_active_slave, addr, true); } /* slave being removed should not be active at this point @@ -1032,8 +1033,8 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], dev_queue_xmit(skb); } - -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], + bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); struct net_device *upper; @@ -1045,10 +1046,19 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) /* loop through vlans and send one packet for each */ rcu_read_lock(); netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) - alb_send_lp_vid(slave, mac_addr, - vlan_dev_vlan_proto(upper), - vlan_dev_vlan_id(upper)); + if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { + if (strict_match && + ether_addr_equal_64bits(mac_addr, + upper->dev_addr)) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } else if (!strict_match) { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } + } } rcu_read_unlock(); } @@ -1108,7 +1118,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, /* fasten the change in the switch */ if (SLAVE_IS_OK(slave1)) { - alb_send_learning_packets(slave1, slave1->dev->dev_addr); + alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed @@ -1120,7 +1130,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, } if (SLAVE_IS_OK(slave2)) { - alb_send_learning_packets(slave2, slave2->dev->dev_addr); + alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed @@ -1491,6 +1501,8 @@ void bond_alb_monitor(struct work_struct *work) /* send learning packets */ if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { + bool strict_match; + /* change of curr_active_slave involves swapping of mac addresses. * in order to avoid this swapping from happening while * sending the learning packets, the curr_slave_lock must be held for @@ -1498,8 +1510,15 @@ void bond_alb_monitor(struct work_struct *work) */ read_lock(&bond->curr_slave_lock); - bond_for_each_slave_rcu(bond, slave, iter) - alb_send_learning_packets(slave, slave->dev->dev_addr); + bond_for_each_slave_rcu(bond, slave, iter) { + /* If updating current_active, use all currently + * user mac addreses (!strict_match). Otherwise, only + * use mac of the slave device. + */ + strict_match = (slave != bond->curr_active_slave); + alb_send_learning_packets(slave, slave->dev->dev_addr, + strict_match); + } read_unlock(&bond->curr_slave_lock); @@ -1722,7 +1741,8 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave } else { /* set the new_slave to the bond mac address */ alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); - alb_send_learning_packets(new_slave, bond->dev->dev_addr); + alb_send_learning_packets(new_slave, bond->dev->dev_addr, + false); } write_lock_bh(&bond->curr_slave_lock); @@ -1765,7 +1785,8 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr); read_lock(&bond->lock); - alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); + alb_send_learning_packets(bond->curr_active_slave, + bond_dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform clients mac address has changed */ rlb_req_update_slave_clients(bond, bond->curr_active_slave); From fbdc0ad095c0a299e9abf5d8ac8f58374951149a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 22 May 2014 16:36:55 +0800 Subject: [PATCH 153/178] ipv4: initialise the itag variable in __mkroute_input the value of itag is a random value from stack, and may not be initiated by fib_validate_source, which called fib_combine_itag if CONFIG_IP_ROUTE_CLASSID is not set This will make the cached dst uncertainty Signed-off-by: Li RongQing Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index db1e0da871f4..5e676be3daeb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1519,7 +1519,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *out_dev; unsigned int flags = 0; bool do_cache; - u32 itag; + u32 itag = 0; /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); From b6ed5498601df40489606dbc14a9c7011c16630b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 22 May 2014 11:57:17 -0700 Subject: [PATCH 154/178] batman: fix a bogus warning from batadv_is_on_batman_iface() batman tries to search dev->iflink to check if it's a batman interface, but ->iflink could be 0, which is not a valid ifindex. It should just avoid iflink == 0 case. Reported-by: Jet Chen Tested-by: Jet Chen Cc: David S. Miller Cc: Steffen Klassert Cc: Antonio Quartulli Cc: Marek Lindner Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/batman-adv/hard-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b851cc580853..fbda6b54baff 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == net_dev->ifindex) + if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) return false; /* recurse over the parent device */ From 6aa6caff30f5dcb9e55b03b9710c30b83750cae5 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Thu, 22 May 2014 17:30:54 -0400 Subject: [PATCH 155/178] [SCSI] scsi_transport_sas: move bsg destructor into sas_rphy_remove The recent change in sysfs, bcdde7e221a8750f9b62b6d0bd31b72ea4ad9309 "sysfs: make __sysfs_remove_dir() recursive" revealed an asymmetric rphy device creation/deletion sequence in scsi_transport_sas: modprobe mpt2sas sas_rphy_add device_add A rphy->dev device_add B sas_device transport class device_add C sas_end_device transport class device_add D bsg class rmmod mpt2sas sas_rphy_delete sas_rphy_remove device_del B device_del C device_del A sysfs_remove_group recursive sysfs dir removal sas_rphy_free device_del D warning where device A is the parent of B, C, and D. When sas_rphy_free tries to unregister the bsg request queue (device D above), the ensuing sysfs cleanup discovers that its sysfs group has already been removed and emits a warning, "sysfs group... not found for kobject 'end_device-X:0'". Since bsg creation is a side effect of sas_rphy_add, move its complementary removal call into sas_rphy_remove. This imposes the following tear-down order for the devices above: D, B, C, A. Note the sas_device and sas_end_device transport class devices (B and C above) are created and destroyed both via the list match traversal in attribute_container_device_trigger, so the order in which they are handled is fixed. This is fine as long as they are deleted before their parent device. Signed-off-by: Joe Lawrence Acked-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_sas.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 1b681427dde0..c341f855fadc 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1621,8 +1621,6 @@ void sas_rphy_free(struct sas_rphy *rphy) list_del(&rphy->list); mutex_unlock(&sas_host->lock); - sas_bsg_remove(shost, rphy); - transport_destroy_device(dev); put_device(dev); @@ -1681,6 +1679,7 @@ sas_rphy_remove(struct sas_rphy *rphy) } sas_rphy_unlink(rphy); + sas_bsg_remove(NULL, rphy); transport_remove_device(dev); device_del(dev); } From 6cf12869f5c1a837f18af5f8b2308fa243772735 Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Wed, 21 May 2014 16:04:11 +0100 Subject: [PATCH 156/178] AFS: Part of afs_end_call() is identical to code elsewhere, so split it Split afs_end_call() into two pieces, one of which is identical to code in afs_process_async_call(). Replace the latter with a call to the first part of afs_end_call(). Signed-off-by: Nathaniel Wesley Filardo Signed-off-by: David Howells --- fs/afs/rxrpc.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 9226a6674d7f..1a1110b1a7ff 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -184,15 +184,24 @@ static void afs_free_call(struct afs_call *call) } /* - * End a call + * End a call but do not free it */ -static void afs_end_call(struct afs_call *call) +static void afs_end_call_nofree(struct afs_call *call) { if (call->rxcall) { rxrpc_kernel_end_call(call->rxcall); call->rxcall = NULL; } - call->type->destructor(call); + if (call->type->destructor) + call->type->destructor(call); +} + +/* + * End a call and free it + */ +static void afs_end_call(struct afs_call *call) +{ + afs_end_call_nofree(call); afs_free_call(call); } @@ -640,10 +649,7 @@ static void afs_process_async_call(struct work_struct *work) call->reply = NULL; /* kill the call */ - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - if (call->type->destructor) - call->type->destructor(call); + afs_end_call_nofree(call); /* we can't just delete the call because the work item may be * queued */ From 150a6b478982475c60fa25b7060ab990ece5483d Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Wed, 21 May 2014 14:58:26 +0100 Subject: [PATCH 157/178] AFS: Fix kafs module unloading At present, it is not possible to successfully unload the kafs module if there are outstanding async outgoing calls (those made with afs_make_call()). This appears to be due to the changes introduced by: commit 059499453a9abd1857d442b44da8b4c126dc72a8 Author: Tejun Heo Date: Fri Mar 7 10:24:50 2014 -0500 Subject: afs: don't use PREPARE_WORK which didn't go far enough. The problem is due to: (1) The aforementioned commit introduced a separate handler function pointer in the call, call->async_workfn, in addition to the original workqueue item, call->async_work, for asynchronous operations because workqueues subsystem cannot handle the workqueue item pointer being changed whilst the item is queued or being processed. (2) afs_async_workfn() was introduced in that commit to be the callback for call->async_work. Its sole purpose is to run whatever call->async_workfn points to. (3) call->async_workfn is only used from afs_async_workfn(), which is only set on async_work by afs_collect_incoming_call() - ie. for incoming calls. (4) call->async_workfn is *not* set by afs_make_call() when outgoing calls are made, and call->async_work is set afs_process_async_call() - and not afs_async_workfn(). (5) afs_process_async_call() now changes call->async_workfn rather than call->async_work to point to afs_delete_async_call() to clean up, but this is only effective for incoming calls because call->async_work does not point to afs_async_workfn() for outgoing calls. (6) Because, for incoming calls, call->async_work remains pointing to afs_process_async_call() this results in an infinite loop. Instead, make the workqueue uniformly vector through call->async_workfn, via afs_async_workfn() and simply initialise call->async_workfn to point to afs_process_async_call() in afs_make_call(). Signed-off-by: Nathaniel Wesley Filardo Signed-off-by: David Howells Reviewed-by: Tejun Heo --- fs/afs/rxrpc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 1a1110b1a7ff..5a05014ea7b0 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -58,6 +58,13 @@ static void afs_collect_incoming_call(struct work_struct *); static struct sk_buff_head afs_incoming_calls; static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); +static void afs_async_workfn(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, async_work); + + call->async_workfn(work); +} + /* * open an RxRPC socket and bind it to be a server for callback notifications * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT @@ -348,7 +355,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; - INIT_WORK(&call->async_work, afs_process_async_call); + call->async_workfn = afs_process_async_call; + INIT_WORK(&call->async_work, afs_async_workfn); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; @@ -672,13 +680,6 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) call->reply_size += len; } -static void afs_async_workfn(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - call->async_workfn(work); -} - /* * accept the backlog of incoming calls */ From 656f88ddf1ec3abf2cd20b8b4028c44e8e95f56d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 May 2014 15:55:26 +0100 Subject: [PATCH 158/178] AFS: Pass an afs_call* to call->async_workfn() instead of a work_struct* call->async_workfn() can take an afs_call* arg rather than a work_struct* as the functions assigned there are now called from afs_async_workfn() which has to call container_of() anyway. Signed-off-by: David Howells Reviewed-by: Nathaniel Wesley Filardo Reviewed-by: Tejun Heo --- fs/afs/internal.h | 2 +- fs/afs/rxrpc.c | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index be75b500005d..590b55f46d61 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -75,7 +75,7 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ - work_func_t async_workfn; + void (*async_workfn)(struct afs_call *call); /* asynchronous work function */ struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ struct sk_buff_head rx_queue; /* received packets */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5a05014ea7b0..03a3beb17004 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -25,7 +25,7 @@ static void afs_wake_up_call_waiter(struct afs_call *); static int afs_wait_for_call_to_complete(struct afs_call *); static void afs_wake_up_async_call(struct afs_call *); static int afs_dont_wait_for_call_to_complete(struct afs_call *); -static void afs_process_async_call(struct work_struct *); +static void afs_process_async_call(struct afs_call *); static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *); static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool); @@ -62,7 +62,7 @@ static void afs_async_workfn(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, async_work); - call->async_workfn(work); + call->async_workfn(call); } /* @@ -623,11 +623,8 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call) /* * delete an asynchronous call */ -static void afs_delete_async_call(struct work_struct *work) +static void afs_delete_async_call(struct afs_call *call) { - struct afs_call *call = - container_of(work, struct afs_call, async_work); - _enter(""); afs_free_call(call); @@ -640,11 +637,8 @@ static void afs_delete_async_call(struct work_struct *work) * - on a multiple-thread workqueue this work item may try to run on several * CPUs at the same time */ -static void afs_process_async_call(struct work_struct *work) +static void afs_process_async_call(struct afs_call *call) { - struct afs_call *call = - container_of(work, struct afs_call, async_work); - _enter(""); if (!skb_queue_empty(&call->rx_queue)) From 9abd09acd664c68f06242da191209d9c70df6953 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 May 2014 09:23:51 -0700 Subject: [PATCH 159/178] parisc: 'renameat2()' doesn't need (or have) a separate compat system call The 'renameat2()' system call was incorrectly added as a ENTRY_COMP() in the parisc system call table by commit 18e480aa07f78 ("parisc: add renameat2 syscall"). That causes a link-time error due to there not being any compat version of that system call: arch/parisc/kernel/built-in.o: In function `sys_call_table': (.rodata+0xad0): undefined reference to `compat_sys_renameat2' make: *** [vmlinux] Error 1 Easily fixed by marking the system call as being the same for compat as for native by using ENTRY_SAME() instead of ENTRY_COMP(). Reported-by: Guenter Roeck Acked-by: Miklos Szeredi Acked-by: Helge Deller Signed-off-by: Linus Torvalds --- arch/parisc/kernel/syscall_table.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index f1432da7b4c0..c5fa7a697fba 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -432,7 +432,7 @@ ENTRY_SAME(sched_setattr) ENTRY_SAME(sched_getattr) /* 335 */ ENTRY_COMP(utimes) - ENTRY_COMP(renameat2) + ENTRY_SAME(renameat2) /* Nothing yet */ From b985194c8c0a130ed155b71662e39f7eaea4876f Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Thu, 22 May 2014 11:54:15 -0700 Subject: [PATCH 160/178] hwpoison, hugetlb: lock_page/unlock_page does not match for handling a free hugepage For handling a free hugepage in memory failure, the race will happen if another thread hwpoisoned this hugepage concurrently. So we need to check PageHWPoison instead of !PageHWPoison. If hwpoison_filter(p) returns true or a race happens, then we need to unlock_page(hpage). Signed-off-by: Chen Yucong Reviewed-by: Naoya Horiguchi Tested-by: Naoya Horiguchi Reviewed-by: Andi Kleen Cc: [2.6.36+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 35ef28acf137..dbf8922216ad 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1081,15 +1081,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } else if (PageHuge(hpage)) { /* - * Check "just unpoisoned", "filter hit", and - * "race with other subpage." + * Check "filter hit" and "race with other subpage." */ lock_page(hpage); - if (!PageHWPoison(hpage) - || (hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != hpage && TestSetPageHWPoison(hpage))) { - atomic_long_sub(nr_pages, &num_poisoned_pages); - return 0; + if (PageHWPoison(hpage)) { + if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) + || (p != hpage && TestSetPageHWPoison(hpage))) { + atomic_long_sub(nr_pages, &num_poisoned_pages); + unlock_page(hpage); + return 0; + } } set_page_hwpoison_huge_page(hpage); res = dequeue_hwpoisoned_huge_page(hpage); From 7fcbbaf18392f0b17c95e2f033c8ccf87eecde1d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 May 2014 11:54:16 -0700 Subject: [PATCH 161/178] mm/filemap.c: avoid always dirtying mapping->flags on O_DIRECT In some testing I ran today (some fio jobs that spread over two nodes), we end up spending 40% of the time in filemap_check_errors(). That smells fishy. Looking further, this is basically what happens: blkdev_aio_read() generic_file_aio_read() filemap_write_and_wait_range() if (!mapping->nr_pages) filemap_check_errors() and filemap_check_errors() always attempts two test_and_clear_bit() on the mapping flags, thus dirtying it for every single invocation. The patch below tests each of these bits before clearing them, avoiding this issue. In my test case (4-socket box), performance went from 1.7M IOPS to 4.0M IOPS. Signed-off-by: Jens Axboe Acked-by: Jeff Moyer Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 000a220e2a41..088358c8006b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -257,9 +257,11 @@ static int filemap_check_errors(struct address_space *mapping) { int ret = 0; /* Check for outstanding write errors */ - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + if (test_bit(AS_ENOSPC, &mapping->flags) && + test_and_clear_bit(AS_ENOSPC, &mapping->flags)) ret = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) + if (test_bit(AS_EIO, &mapping->flags) && + test_and_clear_bit(AS_EIO, &mapping->flags)) ret = -EIO; return ret; } From 55231e5c898c5c03c14194001e349f40f59bd300 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 22 May 2014 11:54:17 -0700 Subject: [PATCH 162/178] mm: madvise: fix MADV_WILLNEED on shmem swapouts MADV_WILLNEED currently does not read swapped out shmem pages back in. Commit 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") made find_get_page() filter exceptional radix tree entries but failed to convert all find_get_page() callers that WANT exceptional entries over to find_get_entry(). One of them is shmem swap readahead in madvise, which now skips over any swap-out records. Convert it to find_get_entry(). Fixes: 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") Signed-off-by: Johannes Weiner Reported-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 539eeb96b323..a402f8fdc68e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma, for (; start < end; start += PAGE_SIZE) { index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - page = find_get_page(mapping, index); + page = find_get_entry(mapping, index); if (!radix_tree_exceptional_entry(page)) { if (page) page_cache_release(page); From 6f6acb00514c10be35529402f36ad7a288f08c2e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 22 May 2014 11:54:19 -0700 Subject: [PATCH 163/178] memcg: fix swapcache charge from kernel thread context Commit 284f39afeaa4 ("mm: memcg: push !mm handling out to page cache charge function") explicitly checks for page cache charges without any mm context (from kernel thread context[1]). This seemed to be the only possible case where memory could be charged without mm context so commit 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()") removed the mm check from get_mem_cgroup_from_mm(). This however caused another NULL ptr dereference during early boot when loopback kernel thread splices to tmpfs as reported by Stephan Kulow: BUG: unable to handle kernel NULL pointer dereference at 0000000000000360 IP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60 Oops: 0000 [#1] SMP Modules linked in: btrfs dm_multipath dm_mod scsi_dh multipath raid10 raid456 async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod parport_pc parport nls_utf8 isofs usb_storage iscsi_ibft iscsi_boot_sysfs arc4 ecb fan thermal nfs lockd fscache nls_iso8859_1 nls_cp437 sg st hid_generic usbhid af_packet sunrpc sr_mod cdrom ata_generic uhci_hcd virtio_net virtio_blk ehci_hcd usbcore ata_piix floppy processor button usb_common virtio_pci virtio_ring virtio edd squashfs loop ppa] CPU: 0 PID: 97 Comm: loop1 Not tainted 3.15.0-rc5-5-default #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: __mem_cgroup_try_charge_swapin+0x40/0xe0 mem_cgroup_charge_file+0x8b/0xd0 shmem_getpage_gfp+0x66b/0x7b0 shmem_file_splice_read+0x18f/0x430 splice_direct_to_actor+0xa2/0x1c0 do_lo_receive+0x5a/0x60 [loop] loop_thread+0x298/0x720 [loop] kthread+0xc6/0xe0 ret_from_fork+0x7c/0xb0 Also Branimir Maksimovic reported the following oops which is tiggered for the swapcache charge path from the accounting code for kernel threads: CPU: 1 PID: 160 Comm: kworker/u8:5 Tainted: P OE 3.15.0-rc5-core2-custom #159 Hardware name: System manufacturer System Product Name/MAXIMUSV GENE, BIOS 1903 08/19/2013 task: ffff880404e349b0 ti: ffff88040486a000 task.ti: ffff88040486a000 RIP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60 Call Trace: __mem_cgroup_try_charge_swapin+0x45/0xf0 mem_cgroup_charge_file+0x9c/0xe0 shmem_getpage_gfp+0x62c/0x770 shmem_write_begin+0x38/0x40 generic_perform_write+0xc5/0x1c0 __generic_file_aio_write+0x1d1/0x3f0 generic_file_aio_write+0x4f/0xc0 do_sync_write+0x5a/0x90 do_acct_process+0x4b1/0x550 acct_process+0x6d/0xa0 do_exit+0x827/0xa70 kthread+0xc3/0xf0 This patch fixes the issue by reintroducing mm check into get_mem_cgroup_from_mm. We could do the same trick in __mem_cgroup_try_charge_swapin as we do for the regular page cache path but it is not worth troubles. The check is not that expensive and it is better to have get_mem_cgroup_from_mm more robust. [1] - http://marc.info/?l=linux-mm&m=139463617808941&w=2 Fixes: 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()") Reported-and-tested-by: Stephan Kulow Reported-by: Branimir Maksimovic Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c47dffdcb246..5177c6d4a2dd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1077,9 +1077,18 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) rcu_read_lock(); do { - memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (unlikely(!memcg)) + /* + * Page cache insertions can happen withou an + * actual mm context, e.g. during disk probing + * on boot, loopback IO, acct() writes etc. + */ + if (unlikely(!mm)) memcg = root_mem_cgroup; + else { + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (unlikely(!memcg)) + memcg = root_mem_cgroup; + } } while (!css_tryget(&memcg->css)); rcu_read_unlock(); return memcg; @@ -3958,17 +3967,9 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, return 0; } - /* - * Page cache insertions can happen without an actual mm - * context, e.g. during disk probing on boot. - */ - if (unlikely(!mm)) - memcg = root_mem_cgroup; - else { - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); - if (!memcg) - return -ENOMEM; - } + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); + if (!memcg) + return -ENOMEM; __mem_cgroup_commit_charge(memcg, page, 1, type, false); return 0; } From ad0f614e4723db8cead439cf414108cbf975b224 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 22 May 2014 11:54:20 -0700 Subject: [PATCH 164/178] wait: swap EXIT_ZOMBIE(Z) and EXIT_DEAD(X) chars in TASK_STATE_TO_CHAR_STR In commit ad86622b478e ("wait: swap EXIT_ZOMBIE and EXIT_DEAD to hide EXIT_TRACE from user-space") the order of task state definitions were changed: EXIT_DEAD and EXIT_ZOMBIE were swapped. Though the charterers for the states in TASK_STATE_TO_CHAR_STR string were not updated. This patch synchronizes the string to the order of definitions. Signed-off-by: Masatake YAMATO Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 25f54c79f757..21fbdae61b9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -220,7 +220,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #define TASK_PARKED 512 #define TASK_STATE_MAX 1024 -#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; From 3e030ecc0fc7de10fd0da10c1c19939872a31717 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 22 May 2014 11:54:21 -0700 Subject: [PATCH 165/178] mm/memory-failure.c: fix memory leak by race between poison and unpoison When a memory error happens on an in-use page or (free and in-use) hugepage, the victim page is isolated with its refcount set to one. When you try to unpoison it later, unpoison_memory() calls put_page() for it twice in order to bring the page back to free page pool (buddy or free hugepage list). However, if another memory error occurs on the page which we are unpoisoning, memory_failure() returns without releasing the refcount which was incremented in the same call at first, which results in memory leak and unconsistent num_poisoned_pages statistics. This patch fixes it. Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: [2.6.32+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dbf8922216ad..9ccef39a9de2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1153,6 +1153,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (!PageHWPoison(p)) { printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn); + atomic_long_sub(nr_pages, &num_poisoned_pages); + put_page(hpage); res = 0; goto out; } From 66db6cfd49825f4e3462ceee3bcbb292d57da6fb Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 22 May 2014 11:54:22 -0700 Subject: [PATCH 166/178] ocfs2: fix double kmem_cache_destroy in dlm_init In dlm_init, if create dlm_lockname_cache failed in dlm_init_master_caches, it will destroy dlm_lockres_cache which created before twice. And this will cause system die when loading modules. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmmaster.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index af3f7aa73e13..ee1f88419cb0 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -472,11 +472,15 @@ bail: void dlm_destroy_master_caches(void) { - if (dlm_lockname_cache) + if (dlm_lockname_cache) { kmem_cache_destroy(dlm_lockname_cache); + dlm_lockname_cache = NULL; + } - if (dlm_lockres_cache) + if (dlm_lockres_cache) { kmem_cache_destroy(dlm_lockres_cache); + dlm_lockres_cache = NULL; + } } static void dlm_lockres_release(struct kref *kref) From e60cbeedc48d80689c249ab5dcc3c31ad0452dea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 May 2014 11:54:23 -0700 Subject: [PATCH 167/178] Documentation: fix DOCBOOKS=... building Prior to commit 4266129964b8 ("[media] DocBook: Move all media docbook stuff into its own directory") it was possible to build only a single (or more) book(s) by calling, for example make htmldocs DOCBOOKS=80211.xml This now fails: cp: target `.../Documentation/DocBook//media_api' is not a directory Ignore errors from that copy to make this possible again. Fixes: 4266129964b8 ("[media] DocBook: Move all media docbook stuff into its own directory") Signed-off-by: Johannes Berg Acked-by: Randy Dunlap Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DocBook/media/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile index f9fd615427fb..1d27f0a1abd1 100644 --- a/Documentation/DocBook/media/Makefile +++ b/Documentation/DocBook/media/Makefile @@ -195,7 +195,7 @@ DVB_DOCUMENTED = \ # install_media_images = \ - $(Q)cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api + $(Q)-cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api $(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64 $(Q)base64 -d $< >$@ From 0d9327ab70038ac8c7af6e20456578ab80158f2d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 22 May 2014 11:54:24 -0700 Subject: [PATCH 168/178] MAINTAINERS: add closing angle bracket to Vince Bridgers' email address Signed-off-by: Tobias Klauser Cc: Vince Bridgers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6846c7c622e3..c47d2683bd6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -537,7 +537,7 @@ L: linux-alpha@vger.kernel.org F: arch/alpha/ ALTERA TRIPLE SPEED ETHERNET DRIVER -M: Vince Bridgers L: netdev@vger.kernel.org L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) S: Maintained From 0132514588f3a0685861221058116a383bff4deb Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 25 May 2014 17:23:07 +0200 Subject: [PATCH 169/178] hwmon: Document temp[1-*]_min_hyst sysfs attribute The temp[1-*]_min_hyst sysfs attribute is already implemented by 3 hwmon drivers (adt7x10, lm77 and lm92) but was missing from the standard interface. Also add temp[1-*]_lcrit_hyst for consistency, even though no driver implement that one for the time being. Signed-off-by: Jean Delvare Reviewed-by: Guenter Roeck --- Documentation/hwmon/sysfs-interface | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index 79f8257dd790..2cc95ad46604 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -327,6 +327,13 @@ temp[1-*]_max_hyst from the max value. RW +temp[1-*]_min_hyst + Temperature hysteresis value for min limit. + Unit: millidegree Celsius + Must be reported as an absolute temperature, NOT a delta + from the min value. + RW + temp[1-*]_input Temperature input value. Unit: millidegree Celsius RO @@ -362,6 +369,13 @@ temp[1-*]_lcrit Temperature critical min value, typically lower than Unit: millidegree Celsius RW +temp[1-*]_lcrit_hyst + Temperature hysteresis value for critical min limit. + Unit: millidegree Celsius + Must be reported as an absolute temperature, NOT a delta + from the critical min value. + RW + temp[1-*]_offset Temperature offset which is added to the temperature reading by the chip. From 59cf4243e557aa64ab2ef51280454aa1f3828e14 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 25 May 2014 17:23:08 +0200 Subject: [PATCH 170/178] hwmon: (ntc_thermistor) Fix dependencies In commit 9e8269de, support was added for ntc_thermistor devices being declared in the device tree and implemented on top of IIO. With that change, a dependency was added to the ntc_thermistor driver: depends on (!OF && !IIO) || (OF && IIO) This construct has the drawback that the driver can no longer be selected when OF is set and IIO isn't, nor when IIO is set and OF is not. This is a regression for the original users of the driver. As the new code depends on IIO and is useless without OF, include it only if both are enabled, and set the dependencies accordingly. This is clearer, more simple and more correct. Signed-off-by: Jean Delvare Fixes: 9e8269de hwmon: (ntc_thermistor) Add DT with IIO support to NTC thermistor driver Reviewed-by: Guenter Roeck Cc: Naveen Krishna Chatradhi Cc: Doug Anderson --- drivers/hwmon/Kconfig | 2 +- drivers/hwmon/ntc_thermistor.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index bc196f49ec53..4af0da96c2e2 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1053,7 +1053,7 @@ config SENSORS_PC87427 config SENSORS_NTC_THERMISTOR tristate "NTC thermistor support" - depends on (!OF && !IIO) || (OF && IIO) + depends on !OF || IIO=n || IIO help This driver supports NTC thermistors sensor reading and its interpretation. The driver can also monitor the temperature and diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 8a17f01e8672..6b4413ce4d39 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -141,7 +141,7 @@ struct ntc_data { char name[PLATFORM_NAME_SIZE]; }; -#ifdef CONFIG_OF +#if defined(CONFIG_OF) && IS_ENABLED(CONFIG_IIO) static int ntc_adc_iio_read(struct ntc_thermistor_platform_data *pdata) { struct iio_channel *channel = pdata->chan; @@ -223,6 +223,8 @@ ntc_thermistor_parse_dt(struct platform_device *pdev) return NULL; } +#define ntc_match NULL + static void ntc_iio_channel_release(struct ntc_thermistor_platform_data *pdata) { } #endif From ead82d6792ef5c600d535bca6ec50a4da14ff7c7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 25 May 2014 17:23:08 +0200 Subject: [PATCH 171/178] hwmon: (ntc_thermistor) Fix OF device ID mapping The mapping from OF device IDs to platform device IDs is wrong. TYPE_NCPXXWB473 is 0, TYPE_NCPXXWL333 is 1, so ntc_thermistor_id[TYPE_NCPXXWB473] is { "ncp15wb473", TYPE_NCPXXWB473 } while ntc_thermistor_id[TYPE_NCPXXWL333] is { "ncp18wb473", TYPE_NCPXXWB473 }. So the name is wrong for all but the "ntc,ncp15wb473" entry, and the type is wrong for the "ntc,ncp15wl333" entry. So map the entries by index, it is neither elegant nor robust but at least it is correct. Signed-off-by: Jean Delvare Fixes: 9e8269de hwmon: (ntc_thermistor) Add DT with IIO support to NTC thermistor driver Reviewed-by: Guenter Roeck Cc: Naveen Krishna Chatradhi Cc: Doug Anderson --- drivers/hwmon/ntc_thermistor.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 6b4413ce4d39..e76feb86a1d4 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -44,6 +44,7 @@ struct ntc_compensation { unsigned int ohm; }; +/* Order matters, ntc_match references the entries by index */ static const struct platform_device_id ntc_thermistor_id[] = { { "ncp15wb473", TYPE_NCPXXWB473 }, { "ncp18wb473", TYPE_NCPXXWB473 }, @@ -163,15 +164,15 @@ static int ntc_adc_iio_read(struct ntc_thermistor_platform_data *pdata) static const struct of_device_id ntc_match[] = { { .compatible = "ntc,ncp15wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[0] }, { .compatible = "ntc,ncp18wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[1] }, { .compatible = "ntc,ncp21wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[2] }, { .compatible = "ntc,ncp03wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[3] }, { .compatible = "ntc,ncp15wl333", - .data = &ntc_thermistor_id[TYPE_NCPXXWL333] }, + .data = &ntc_thermistor_id[4] }, { }, }; MODULE_DEVICE_TABLE(of, ntc_match); From f9b899c44fec53c1519b19f853e96191ae96b1c6 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 15 May 2014 13:54:06 -0700 Subject: [PATCH 172/178] Documentation: fix typos in drm docbook Fix spelling typo in DocBook/drm.tmpl Signed-off-by: Masanari Iida Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/DocBook/drm.tmpl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 677a02553ec0..ba60d93c1855 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -79,7 +79,7 @@ This first part of the DRM Developer's Guide documents core DRM code, - helper libraries for writting drivers and generic userspace interfaces + helper libraries for writing drivers and generic userspace interfaces exposed by DRM drivers. @@ -459,7 +459,7 @@ char *date; providing a solution to every graphics memory-related problems, GEM identified common code between drivers and created a support library to share it. GEM has simpler initialization and execution requirements than - TTM, but has no video RAM management capabitilies and is thus limited to + TTM, but has no video RAM management capabilities and is thus limited to UMA devices. @@ -889,7 +889,7 @@ int (*prime_fd_to_handle)(struct drm_device *dev, vice versa. Drivers must use the kernel dma-buf buffer sharing framework to manage the PRIME file descriptors. Similar to the mode setting API PRIME is agnostic to the underlying buffer object manager, as - long as handles are 32bit unsinged integers. + long as handles are 32bit unsigned integers. While non-GEM drivers must implement the operations themselves, GEM @@ -2356,7 +2356,7 @@ void intel_crt_init(struct drm_device *dev) first create properties and then create and associate individual instances of those properties to objects. A property can be instantiated multiple times and associated with different objects. Values are stored in property - instances, and all other property information are stored in the propery + instances, and all other property information are stored in the property and shared between all instances of the property. @@ -2697,10 +2697,10 @@ int num_ioctls; Legacy Support Code - The section very brievely covers some of the old legacy support code which + The section very briefly covers some of the old legacy support code which is only used by old DRM drivers which have done a so-called shadow-attach to the underlying device instead of registering as a real driver. This - also includes some of the old generic buffer mangement and command + also includes some of the old generic buffer management and command submission code. Do not use any of this in new and modern drivers. From f9a0974d3f7049f6f753ebd28b8f0907983acf3c Mon Sep 17 00:00:00 2001 From: Paul McQuade Date: Thu, 15 May 2014 13:54:25 -0700 Subject: [PATCH 173/178] Documentation: update thunderbird email client settings Added setting to email-clients that is easier to read and is easier to setup thunderbird. Removed config settings and added GUI settings. Signed-off-by: Paul McQuade Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/email-clients.txt | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index e9f5daccbd02..4e30ebaa9e5b 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -201,20 +201,15 @@ To beat some sense out of the internal editor, do this: - Edit your Thunderbird config settings so that it won't use format=flowed. Go to "edit->preferences->advanced->config editor" to bring up the - thunderbird's registry editor, and set "mailnews.send_plaintext_flowed" to - "false". + thunderbird's registry editor. -- Disable HTML Format: Set "mail.identity.id1.compose_html" to "false". +- Set "mailnews.send_plaintext_flowed" to "false" -- Enable "preformat" mode: Set "editor.quotesPreformatted" to "true". +- Set "mailnews.wraplength" from "72" to "0" -- Enable UTF8: Set "prefs.converted-to-utf8" to "true". +- "View" > "Message Body As" > "Plain Text" -- Install the "toggle wordwrap" extension. Download the file from: - https://addons.mozilla.org/thunderbird/addon/2351/ - Then go to "tools->add ons", select "install" at the bottom of the screen, - and browse to where you saved the .xul file. This adds an "Enable - Wordwrap" entry under the Options menu of the message composer. +- "View" > "Character Encoding" > "Unicode (UTF-8)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TkRat (GUI) From f76f133b0c3b182c2d4f4de21a04ba4c964d508d Mon Sep 17 00:00:00 2001 From: Jonathan Callen Date: Thu, 15 May 2014 13:54:52 -0700 Subject: [PATCH 174/178] Documentation: update java sample wrapper for java 7 The sample wrapper currently fails on some Java 7 .class files. This updates the wrapper to properly handle those files. Signed-off-by: Jonathan Callen Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/java.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/java.txt b/Documentation/java.txt index e6a723281547..418020584ccc 100644 --- a/Documentation/java.txt +++ b/Documentation/java.txt @@ -188,6 +188,9 @@ shift #define CP_METHODREF 10 #define CP_INTERFACEMETHODREF 11 #define CP_NAMEANDTYPE 12 +#define CP_METHODHANDLE 15 +#define CP_METHODTYPE 16 +#define CP_INVOKEDYNAMIC 18 /* Define some commonly used error messages */ @@ -242,14 +245,19 @@ void skip_constant(FILE *classfile, u_int16_t *cur) break; case CP_CLASS: case CP_STRING: + case CP_METHODTYPE: seekerr = fseek(classfile, 2, SEEK_CUR); break; + case CP_METHODHANDLE: + seekerr = fseek(classfile, 3, SEEK_CUR); + break; case CP_INTEGER: case CP_FLOAT: case CP_FIELDREF: case CP_METHODREF: case CP_INTERFACEMETHODREF: case CP_NAMEANDTYPE: + case CP_INVOKEDYNAMIC: seekerr = fseek(classfile, 4, SEEK_CUR); break; case CP_LONG: From 3568a1dbf174a1ae0022611998c4e5d588b5014d Mon Sep 17 00:00:00 2001 From: Jan Moskyto Matejka Date: Thu, 15 May 2014 13:55:34 -0700 Subject: [PATCH 175/178] Documentation: update /proc/stat "intr" count summary The sum at the beginning of line "intr" includes also unnumbered interrupts. It implies that the sum at the beginning isn't the sum of the remainder of the line, not even an estimation. Fixed the documentation to mention that. This behaviour was added to /proc/stat in commit a2eddfa95919 ("x86: make /proc/stat account for all interrupts") Signed-off-by: Jan Moskyto Matejka Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 8b9cd8eb3f91..264bcde0c51c 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1245,8 +1245,9 @@ second). The meanings of the columns are as follows, from left to right: The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all -interrupts serviced; each subsequent column is the total for that particular -interrupt. +interrupts serviced including unnumbered architecture specific interrupts; +each subsequent column is the total for that particular numbered interrupt. +Unnumbered interrupts are not shown, only summed into the total. The "ctxt" line gives the total number of context switches across all CPUs. From c7208164e66f63e3ec1759b98087849286410741 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 May 2014 16:06:00 -0700 Subject: [PATCH 176/178] Linux 3.15-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9d993787afe0..cf3412d78ff1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Shuffling Zombie Juror # *DOCUMENTATION* From 72013795a73f8536d3346fa90379c987bcad0cc8 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 20 May 2014 20:59:21 +0800 Subject: [PATCH 177/178] ACPI: Add acpi_bus_attach_private_data() to attach data to ACPI handle There is already acpi_bus_get_private_data() to get ACPI handle data which is associated with acpi_bus_private_data_handler(). This patch is to add acpi_bus_attach_private_data() to make a pair and facilitate to attach and get data to/from ACPI handle. Reviewed-by: Mika Westerberg Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 28 ++++++++++++++++++++++++---- include/acpi/acpi_bus.h | 2 ++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index cf925c4f36b7..8445d570f60a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -132,6 +132,21 @@ void acpi_bus_private_data_handler(acpi_handle handle, } EXPORT_SYMBOL(acpi_bus_private_data_handler); +int acpi_bus_attach_private_data(acpi_handle handle, void *data) +{ + acpi_status status; + + status = acpi_attach_data(handle, + acpi_bus_private_data_handler, data); + if (ACPI_FAILURE(status)) { + acpi_handle_debug(handle, "Error attaching device data\n"); + return -ENODEV; + } + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_bus_attach_private_data); + int acpi_bus_get_private_data(acpi_handle handle, void **data) { acpi_status status; @@ -140,15 +155,20 @@ int acpi_bus_get_private_data(acpi_handle handle, void **data) return -EINVAL; status = acpi_get_data(handle, acpi_bus_private_data_handler, data); - if (ACPI_FAILURE(status) || !*data) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No context for object [%p]\n", - handle)); + if (ACPI_FAILURE(status)) { + acpi_handle_debug(handle, "No context for object\n"); return -ENODEV; } return 0; } -EXPORT_SYMBOL(acpi_bus_get_private_data); +EXPORT_SYMBOL_GPL(acpi_bus_get_private_data); + +void acpi_bus_detach_private_data(acpi_handle handle) +{ + acpi_detach_data(handle, acpi_bus_private_data_handler); +} +EXPORT_SYMBOL_GPL(acpi_bus_detach_private_data); void acpi_bus_no_hotplug(acpi_handle handle) { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 84a2e29a2314..d2006ca31dba 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -406,6 +406,8 @@ extern struct kobject *acpi_kobj; extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int); void acpi_bus_private_data_handler(acpi_handle, void *); int acpi_bus_get_private_data(acpi_handle, void **); +int acpi_bus_attach_private_data(acpi_handle, void *); +void acpi_bus_detach_private_data(acpi_handle); void acpi_bus_no_hotplug(acpi_handle handle); extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32); extern int register_acpi_notifier(struct notifier_block *); From e6f8a4d60b905eae1a20cbb0c72c67b26b2f02fd Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 20 May 2014 20:59:22 +0800 Subject: [PATCH 178/178] ACPI / thermal: Use acpi_bus_attach_private_data() to attach private data Use acpi_bus_attach_private_data() to attach private data instead of acpi_attach_data(). Reviewed-by: Mika Westerberg Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/thermal.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index c1e31a41f949..ae195fde858d 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -925,13 +925,10 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) if (result) return result; - status = acpi_attach_data(tz->device->handle, - acpi_bus_private_data_handler, - tz->thermal_zone); - if (ACPI_FAILURE(status)) { - pr_err(PREFIX "Error attaching device data\n"); + status = acpi_bus_attach_private_data(tz->device->handle, + tz->thermal_zone); + if (ACPI_FAILURE(status)) return -ENODEV; - } tz->tz_enabled = 1; @@ -946,7 +943,7 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz) sysfs_remove_link(&tz->thermal_zone->device.kobj, "device"); thermal_zone_device_unregister(tz->thermal_zone); tz->thermal_zone = NULL; - acpi_detach_data(tz->device->handle, acpi_bus_private_data_handler); + acpi_bus_detach_private_data(tz->device->handle); }