From 17b14ca25e9cd6c5cd7605941f6120e405a84f8b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 19 Nov 2012 15:37:46 +0100 Subject: [PATCH 001/261] netfilter: ipset: Fix range bug in hash:ip,port,net Due to the missing ininitalization at adding/deleting entries, when a plain_ip,port,net element was the object, multiple elements were added/deleted instead. The bug came from the missing dangling default initialization. The error-prone default initialization is corrected in all hash:* types. --- net/netfilter/ipset/ip_set_hash_ip.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ipport.c | 7 +++---- net/netfilter/ipset/ip_set_hash_ipportip.c | 7 +++---- net/netfilter/ipset/ip_set_hash_ipportnet.c | 7 +++++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index ec3dba5dcd62..5c0b78528e55 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -173,6 +173,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return adtfn(set, &nip, timeout, flags); } + ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -185,8 +186,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else - ip_to = ip; + } hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 0171f7502fa5..6283351f4eeb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -162,7 +162,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -210,7 +210,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ip_set_eexist(ret, flags) ? 0 : ret; } - ip = ntohl(data.ip); + ip_to = ip = ntohl(data.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -223,8 +223,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else - ip_to = ip; + } port_to = port = ntohs(data.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6344ef551ec8..6a21271c8d5a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -166,7 +166,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -218,7 +218,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ip_set_eexist(ret, flags) ? 0 : ret; } - ip = ntohl(data.ip); + ip_to = ip = ntohl(data.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -231,8 +231,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else - ip_to = ip; + } port_to = port = ntohs(data.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index cb71f9a774e7..2d5cd4ee30eb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -215,8 +215,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 }; - u32 ip, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to, ip2_last, ip2; + u32 ip, ip_to, p = 0, port, port_to; + u32 ip2_from, ip2_to, ip2_last, ip2; u32 timeout = h->timeout; bool with_ports = false; u8 cidr; @@ -286,6 +286,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ip_set_eexist(ret, flags) ? 0 : ret; } + ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -306,6 +307,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); } + + ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); if (ret) From fa5199648e273a5e3e80aca41c1eb53700438dc1 Mon Sep 17 00:00:00 2001 From: Simon Que Date: Thu, 17 Jan 2013 11:18:20 -0800 Subject: [PATCH 002/261] eCryptfs: initialize payload_len in keystore.c This is meant to remove a compiler warning. It should not make any functional change. payload_len should be initialized when it is passed to write_tag_64_packet() as a pointer. If that call fails, this function should return early, and payload_len won't be used. Signed-off-by: Simon Que Signed-off-by: Tyler Hicks --- fs/ecryptfs/keystore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 2333203a120b..6154cde3a052 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1150,7 +1150,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_message *msg = NULL; char *auth_tok_sig; char *payload; - size_t payload_len; + size_t payload_len = 0; int rc; rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); From bbcb03e3ce8c76a4ecf0ea7d365f9f0e913fc329 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 17 Jan 2013 11:36:10 -0800 Subject: [PATCH 003/261] eCryptfs: Fix -Wunused-but-set-variable warnings These two variables are no longer used and can be removed. Fixes warnings when building with W=1. Signed-off-by: Tyler Hicks --- fs/ecryptfs/dentry.c | 2 -- fs/ecryptfs/file.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 1b5d9af937df..bf12ba5dd223 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -45,14 +45,12 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry; - struct vfsmount *lower_mnt; int rc = 1; if (flags & LOOKUP_RCU) return -ECHILD; lower_dentry = ecryptfs_dentry_to_lower(dentry); - lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index d45ba4568128..bfa52d2ef460 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -199,7 +199,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file) struct dentry *ecryptfs_dentry = file->f_path.dentry; /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ - struct dentry *lower_dentry; struct ecryptfs_file_info *file_info; mount_crypt_stat = &ecryptfs_superblock_to_private( @@ -222,7 +221,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file) rc = -ENOMEM; goto out; } - lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; mutex_lock(&crypt_stat->cs_mutex); if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) { From 111d61a25ec11c1837ac0fd81bf4b845d3327fb7 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 17 Jan 2013 12:19:35 -0800 Subject: [PATCH 004/261] eCryptfs: Fix -Wmissing-prototypes warnings Mark two inode operation fuctions as static. Fixes warnings when building with W=1. Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index cc7709e7c508..ddd961ba2cf9 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -999,8 +999,8 @@ out: return rc; } -int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) { struct ecryptfs_mount_crypt_stat *mount_crypt_stat; int rc = 0; @@ -1021,8 +1021,8 @@ int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, return rc; } -int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) { struct kstat lower_stat; int rc; From a07c48ad5be5cbf32a2741f10e6fbf4bbfcaa362 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 26 Jan 2013 10:48:42 +0300 Subject: [PATCH 005/261] eCryptfs: remove unneeded checks in virt_to_scatterlist() This is always called with a valid "sg" pointer. My static checker complains because the call to sg_init_table() dereferences "sg" before we reach the checks. Signed-off-by: Dan Carpenter Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a7b0c2dfb3db..d5c25db4398f 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -301,17 +301,14 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, while (size > 0 && i < sg_size) { pg = virt_to_page(addr); offset = offset_in_page(addr); - if (sg) - sg_set_page(&sg[i], pg, 0, offset); + sg_set_page(&sg[i], pg, 0, offset); remainder_of_page = PAGE_CACHE_SIZE - offset; if (size >= remainder_of_page) { - if (sg) - sg[i].length = remainder_of_page; + sg[i].length = remainder_of_page; addr += remainder_of_page; size -= remainder_of_page; } else { - if (sg) - sg[i].length = size; + sg[i].length = size; addr += size; size = 0; } From 3a46741804a4226cd837d1246eed95d4161f7159 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 12 Feb 2013 10:56:54 -0700 Subject: [PATCH 006/261] eCryptfs: decrypt_pki_encrypted_session_key(): remove kfree() redundant null check smatch analysis: fs/ecryptfs/keystore.c:1206 decrypt_pki_encrypted_session_key() info: redundant null check on msg calling kfree() Cc: Dustin Kirkland Cc: ecryptfs@vger.kernel.org Signed-off-by: Tim Gardner Signed-off-by: Tyler Hicks --- fs/ecryptfs/keystore.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 6154cde3a052..5aceff202dc0 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1202,8 +1202,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, crypt_stat->key_size); } out: - if (msg) - kfree(msg); + kfree(msg); return rc; } From 1101d58669a92ed9c9f4c7281404fb1e067a1e28 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 12 Feb 2013 11:03:49 -0700 Subject: [PATCH 007/261] ecryptfs: ecryptfs_msg_ctx_alloc_to_free(): remove kfree() redundant null check smatch analysis: fs/ecryptfs/messaging.c:101 ecryptfs_msg_ctx_alloc_to_free() info: redundant null check on msg_ctx->msg calling kfree() Cc: Dustin Kirkland Cc: ecryptfs@vger.kernel.org Signed-off-by: Tim Gardner Signed-off-by: Tyler Hicks --- fs/ecryptfs/messaging.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 5fa2471796c2..d5c7297c5816 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -97,8 +97,7 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) { list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); - if (msg_ctx->msg) - kfree(msg_ctx->msg); + kfree(msg_ctx->msg); msg_ctx->msg = NULL; msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; } From bbfa57c0f2243a7c31fd248d22e9861a2802cad5 Mon Sep 17 00:00:00 2001 From: Sebastian Riemer Date: Thu, 21 Feb 2013 13:28:09 +1100 Subject: [PATCH 008/261] md: protect against crash upon fsync on ro array If an fsync occurs on a read-only array, we need to send a completion for the IO and may not increment the active IO count. Otherwise, we hit a bug trace and can't stop the MD array anymore. By advice of Christoph Hellwig we return success upon a flush request but we return -EROFS for other writes. We detect flush requests by checking if the bio has zero sectors. This patch is suitable to any -stable kernel to which it applies. Cc: Christoph Hellwig Cc: Ben Hutchings Cc: NeilBrown Cc: stable@vger.kernel.org Signed-off-by: Sebastian Riemer Reported-by: Ben Hutchings Acked-by: Paul Menzel Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3db3d1b271f7..1e634a68541e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -307,6 +307,10 @@ static void md_make_request(struct request_queue *q, struct bio *bio) bio_io_error(bio); return; } + if (mddev->ro == 1 && unlikely(rw == WRITE)) { + bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS); + return; + } smp_rmb(); /* Ensure implications of 'active' are visible */ rcu_read_lock(); if (mddev->suspended) { From cf1c4a094f46ace5bf00078e2db73491ddf018fe Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Tue, 19 Feb 2013 11:35:59 -0800 Subject: [PATCH 009/261] netfilter: ipset: timeout values corrupted on set resize If a resize is triggered on a set with timeouts enabled, the timeout values will get corrupted when copying them to the new set. This occured b/c the wrong timeout value is supplied to type_pf_elem_tadd(). This also adds simple debug statement similar to the one in type_pf_resize(). Signed-off-by: Josh Hunt Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_ahash.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index ef9acd3c8450..01d25e6fc792 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -854,6 +854,8 @@ type_pf_tresize(struct ip_set *set, bool retried) retry: ret = 0; htable_bits++; + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); if (!htable_bits) { /* In case we have plenty of memory :-) */ pr_warning("Cannot increase the hashsize of set %s further\n", @@ -873,7 +875,7 @@ retry: data = ahash_tdata(n, j); m = hbucket(t, HKEY(data, h->initval, htable_bits)); ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0, - type_pf_data_timeout(data)); + ip_set_timeout_get(type_pf_data_timeout(data))); if (ret < 0) { read_unlock_bh(&set->lock); ahash_destroy(t); From dd82088dab3646ed28e4aa43d1a5b5d5ffc2afba Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 21 Feb 2013 11:12:40 +0100 Subject: [PATCH 010/261] netfilter: ipset: "Directory not empty" error message When an entry flagged with "nomatch" was tested by ipset, it returned the error message "Kernel error received: Directory not empty" instead of " is NOT in set " (reported by John Brendler). The internal error code was not properly transformed before returning to userspace, fixed. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6d6d8f2b033e..38ca630eeeb8 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1470,7 +1470,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, if (ret == -EAGAIN) ret = 1; - return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; + return (ret < 0 && ret != -ENOTEMPTY) ? ret : + ret > 0 ? 0 : -IPSET_ERR_EXIST; } /* Get headed data of a set */ From e5ab012c3271990e8457055c25cafddc1ae8aa6b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 20 Feb 2013 16:15:36 +0100 Subject: [PATCH 011/261] nohz: Make tick_nohz_irq_exit() irq safe As it stands, irq_exit() may or may not be called with irqs disabled, depending on __ARCH_IRQ_EXIT_IRQS_DISABLED that the arch can define. It makes tick_nohz_irq_exit() unsafe. For example two interrupts can race in tick_nohz_stop_sched_tick(): the inner most one computes the expiring time on top of the timer list, then it's interrupted right before reprogramming the clock. The new interrupt enqueues a new timer list timer, it reprogram the clock to take it into account and it exits. The CPUs resumes the inner most interrupt and performs the clock reprogramming without considering the new timer list timer. This regression has been introduced by: 280f06774afedf849f0b34248ed6aff57d0f6908 ("nohz: Separate out irq exit and idle loop dyntick logic") Let's fix it right now with the appropriate protections. A saner long term solution will be to remove __ARCH_IRQ_EXIT_IRQS_DISABLED and mandate that irq_exit() is called with interrupts disabled. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Linus Torvalds Cc: #v3.2+ Link: http://lkml.kernel.org/r/1361373336-11337-1-git-send-email-fweisbec@gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 314b9ee07edf..520592ab6aa4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -565,14 +565,19 @@ void tick_nohz_idle_enter(void) */ void tick_nohz_irq_exit(void) { + unsigned long flags; struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); if (!ts->inidle) return; - /* Cancel the timer because CPU already waken up from the C-states*/ + local_irq_save(flags); + + /* Cancel the timer because CPU already waken up from the C-states */ menu_hrtimer_cancel(); __tick_nohz_idle_enter(ts); + + local_irq_restore(flags); } /** From 74eed0163d0def3fce27228d9ccf3d36e207b286 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Feb 2013 22:00:48 +0100 Subject: [PATCH 012/261] irq: Ensure irq_exit() code runs with interrupts disabled We had already a few problems with code called from irq_exit() when interrupted from a nesting interrupt. This can happen on architectures which do not define __ARCH_IRQ_EXIT_IRQS_DISABLED. __ARCH_IRQ_EXIT_IRQS_DISABLED should go away and we want to make it mandatory to call irq_exit() with interrupts disabled. As a temporary protection disable interrupts for those architectures which do not define __ARCH_IRQ_EXIT_IRQS_DISABLED and add a WARN_ONCE when an architecture which defines __ARCH_IRQ_EXIT_IRQS_DISABLED calls irq_exit() with interrupts enabled. Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1302202155320.22263@ionos --- kernel/softirq.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/softirq.c b/kernel/softirq.c index f5cc25f147a6..f2a934673008 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -341,6 +341,14 @@ static inline void invoke_softirq(void) */ void irq_exit(void) { +#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED + unsigned long flags; + + local_irq_save(flags); +#else + WARN_ON_ONCE(!irqs_disabled()); +#endif + account_irq_exit_time(current); trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); @@ -354,6 +362,9 @@ void irq_exit(void) #endif rcu_irq_exit(); sched_preempt_enable_no_resched(); +#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED + local_irq_restore(flags); +#endif } /* From facd8b80c67a3cf64a467c4a2ac5fb31f2e6745b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2013 18:17:42 +0100 Subject: [PATCH 013/261] irq: Sanitize invoke_softirq With the irq protection in irq_exit, we can remove the #ifdeffery and the bh_disable/enable dance in invoke_softirq() Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1302202155320.22263@ionos --- kernel/softirq.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index f2a934673008..24a921bcf04f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -322,18 +322,10 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (!force_irqthreads) { -#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED + if (!force_irqthreads) __do_softirq(); -#else - do_softirq(); -#endif - } else { - __local_bh_disable((unsigned long)__builtin_return_address(0), - SOFTIRQ_OFFSET); + else wakeup_softirqd(); - __local_bh_enable(SOFTIRQ_OFFSET); - } } /* From af7bdbafe3812af406ce07631effd2b96aae2dba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2013 18:21:30 +0100 Subject: [PATCH 014/261] Revert "nohz: Make tick_nohz_irq_exit() irq safe" This reverts commit 351429b2e62b6545bb10c756686393f29ba268a1. The extra local_irq_save() is not longer needed as the call site now always calls with interrupts disabled. Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Linus Torvalds --- kernel/time/tick-sched.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 520592ab6aa4..314b9ee07edf 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -565,19 +565,14 @@ void tick_nohz_idle_enter(void) */ void tick_nohz_irq_exit(void) { - unsigned long flags; struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); if (!ts->inidle) return; - local_irq_save(flags); - - /* Cancel the timer because CPU already waken up from the C-states */ + /* Cancel the timer because CPU already waken up from the C-states*/ menu_hrtimer_cancel(); __tick_nohz_idle_enter(ts); - - local_irq_restore(flags); } /** From 4d4c4e24cf48400a24d33feffc7cca4f4e8cabe1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 22 Feb 2013 00:05:07 +0100 Subject: [PATCH 015/261] irq: Remove IRQ_EXIT_OFFSET workaround The IRQ_EXIT_OFFSET trick was used to make sure the irq doesn't get preempted after we substract the HARDIRQ_OFFSET until we are entirely done with any code in irq_exit(). This workaround was necessary because some archs may call irq_exit() with irqs enabled and there is still some code in the end of this function that is not covered by the HARDIRQ_OFFSET but want to stay non-preemptible. Now that irq are always disabled in irq_exit(), the whole code is guaranteed not to be preempted. We can thus remove this hack. Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul E. McKenney --- include/linux/hardirq.h | 2 -- kernel/softirq.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 29eb805ea4a6..c1d6555d2567 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,10 +118,8 @@ #ifdef CONFIG_PREEMPT_COUNT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) -# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) #else # define preemptible() 0 -# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET #endif #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) diff --git a/kernel/softirq.c b/kernel/softirq.c index 24a921bcf04f..f42ff97e1f8f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -343,7 +343,7 @@ void irq_exit(void) account_irq_exit_time(current); trace_hardirq_exit(); - sub_preempt_count(IRQ_EXIT_OFFSET); + sub_preempt_count(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); @@ -353,7 +353,6 @@ void irq_exit(void) tick_nohz_irq_exit(); #endif rcu_irq_exit(); - sched_preempt_enable_no_resched(); #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED local_irq_restore(flags); #endif From 1b91731d23a3dd8e8d7e3ee21b8c6d6d4cde62c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Feb 2013 12:55:01 +0100 Subject: [PATCH 016/261] mac80211: fix tim_lock locking The ieee80211_beacon_add_tim() function might be called by drivers with BHs enabled, which causes a potential deadlock if TX happens at the same time and attempts to lock the tim_lock as well. Use spin_lock_bh to fix it. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5b9602b62405..5800c7a0d075 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2360,9 +2360,9 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, if (local->tim_in_locked_section) { __ieee80211_beacon_add_tim(sdata, ps, skb); } else { - spin_lock(&local->tim_lock); + spin_lock_bh(&local->tim_lock); __ieee80211_beacon_add_tim(sdata, ps, skb); - spin_unlock(&local->tim_lock); + spin_unlock_bh(&local->tim_lock); } return 0; From 1c33a0594583059afc983b5ad3c3352849cd5205 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Feb 2013 23:44:38 +0100 Subject: [PATCH 017/261] nl80211: remove radar information The wiphy information is getting very close to being too much for a typical netlink dump message and adding the radar attributes to channels and interface combinations can push it over the limit, which means userspace gets no information whatsoever. Therefore, remove these again for now, no driver actually supports radar detection anyway and a modified userspace is required as well. We're working on a solution that will allow userspace to request splitting the information across multiple netlink messages, which will allow us to add this back. Cc: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 35545ccc30fd..cf4c7947f336 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -554,16 +554,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; - if (chan->flags & IEEE80211_CHAN_RADAR) { - u32 time = elapsed_jiffies_msecs(chan->dfs_state_entered); - if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) - goto nla_put_failure; - if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, - chan->dfs_state)) - goto nla_put_failure; - if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) - goto nla_put_failure; - } + if ((chan->flags & IEEE80211_CHAN_RADAR) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) goto nla_put_failure; @@ -900,9 +893,6 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; - if (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, - c->radar_detect_widths)) - goto nla_put_failure; nla_nest_end(msg, nl_combi); } From 162589f7b162b916ac377753c086e3ba76a9f33d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Feb 2013 00:56:27 +0100 Subject: [PATCH 018/261] nl80211: remove TCP WoWLAN information Just like the radar information, the TCP WoWLAN capability data can increase the wiphy information and make it too big. Remove the TCP WoWLAN information; no driver supports it and new userspace tools will be required as well. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 45 ------------------------------------------ 1 file changed, 45 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cf4c7947f336..e652d05ff712 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -904,48 +904,6 @@ nla_put_failure: return -ENOBUFS; } -#ifdef CONFIG_PM -static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, - struct sk_buff *msg) -{ - const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; - struct nlattr *nl_tcp; - - if (!tcp) - return 0; - - nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); - if (!nl_tcp) - return -ENOBUFS; - - if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, - tcp->data_payload_max)) - return -ENOBUFS; - - if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, - tcp->data_payload_max)) - return -ENOBUFS; - - if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) - return -ENOBUFS; - - if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, - sizeof(*tcp->tok), tcp->tok)) - return -ENOBUFS; - - if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, - tcp->data_interval_max)) - return -ENOBUFS; - - if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, - tcp->wake_payload_max)) - return -ENOBUFS; - - nla_nest_end(msg, nl_tcp); - return 0; -} -#endif - static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -1320,9 +1278,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - if (nl80211_send_wowlan_tcp_caps(dev, msg)) - goto nla_put_failure; - nla_nest_end(msg, nl_wowlan); } #endif From 163df6cf292a1024277f500038fc0ed493635673 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 19 Feb 2013 10:04:50 +0800 Subject: [PATCH 019/261] mac80211: fix the problem of forwarding from DS to DS in Mesh Unicast frame with unknown forwarding information always trigger the path discovery assuming destination is always located inside the MBSS. This patch allows the forwarding to look for mesh gate if path discovery inside the MBSS has failed. Reported-by: Cedric Voncken Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5800c7a0d075..bb05a0f86034 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1844,9 +1844,24 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, } if (!is_multicast_ether_addr(skb->data)) { + struct sta_info *next_hop; + bool mpp_lookup = true; + mpath = mesh_path_lookup(sdata, skb->data); - if (!mpath) + if (mpath) { + mpp_lookup = false; + next_hop = rcu_dereference(mpath->next_hop); + if (!next_hop || + !(mpath->flags & (MESH_PATH_ACTIVE | + MESH_PATH_RESOLVING))) + mpp_lookup = true; + } + + if (mpp_lookup) mppath = mpp_path_lookup(sdata, skb->data); + + if (mppath && mpath) + mesh_path_del(mpath->sdata, mpath->dst); } /* From c8dc9c654794a765ca61baed07f84ed8aaa7ca8c Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Thu, 21 Feb 2013 13:28:09 +1100 Subject: [PATCH 020/261] md: raid1,10: Handle REQ_WRITE_SAME flag in write bios Set mddev queue's max_write_same_sectors to its chunk_sector value (before disk_stack_limits merges the underlying disk limits.) With that in place, be sure to handle writes coming down from the block layer that have the REQ_WRITE_SAME flag set. That flag needs to be copied into any newly cloned write bio. Signed-off-by: Joe Lawrence Acked-by: "Martin K. Petersen" Signed-off-by: NeilBrown --- drivers/md/raid1.c | 7 ++++++- drivers/md/raid10.c | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d5bddfc4010e..6e5d5a5f9cb4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1000,6 +1000,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); const unsigned long do_discard = (bio->bi_rw & (REQ_DISCARD | REQ_SECURE)); + const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME); struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; struct raid1_plug_cb *plug = NULL; @@ -1301,7 +1302,8 @@ read_again: conf->mirrors[i].rdev->data_offset); mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_flush_fua | do_sync | do_discard; + mbio->bi_rw = + WRITE | do_flush_fua | do_sync | do_discard | do_same; mbio->bi_private = r1_bio; atomic_inc(&r1_bio->remaining); @@ -2818,6 +2820,9 @@ static int run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); + if (mddev->queue) + blk_queue_max_write_same_sectors(mddev->queue, + mddev->chunk_sectors); rdev_for_each(rdev, mddev) { if (!mddev->gendisk) continue; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 64d48249c03b..1a74c12f0a6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1105,6 +1105,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_fua = (bio->bi_rw & REQ_FUA); const unsigned long do_discard = (bio->bi_rw & (REQ_DISCARD | REQ_SECURE)); + const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME); unsigned long flags; struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; @@ -1460,7 +1461,8 @@ retry_write: rdev)); mbio->bi_bdev = rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; + mbio->bi_rw = + WRITE | do_sync | do_fua | do_discard | do_same; mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -1502,7 +1504,8 @@ retry_write: r10_bio, rdev)); mbio->bi_bdev = rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; + mbio->bi_rw = + WRITE | do_sync | do_fua | do_discard | do_same; mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -3569,6 +3572,8 @@ static int run(struct mddev *mddev) if (mddev->queue) { blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_write_same_sectors(mddev->queue, + mddev->chunk_sectors); blk_queue_io_min(mddev->queue, chunk_size); if (conf->geo.raid_disks % conf->geo.near_copies) blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); From 4c0ca26bd260dddf3b9781758cb5e2df3f74d4a3 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 21 Feb 2013 13:28:09 +1100 Subject: [PATCH 021/261] MD RAID10: Minor non-functional code changes Changes include assigning 'addr' from 's' instead of 'sector' to be consistent with the way the code does it just a few lines later and using '%=' vs a conditional and subtraction. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/raid10.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1a74c12f0a6e..de174ad6f8bd 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -552,14 +552,13 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) for (n = 0; n < geo->near_copies; n++) { int d = dev; sector_t s = sector; - r10bio->devs[slot].addr = sector; r10bio->devs[slot].devnum = d; + r10bio->devs[slot].addr = s; slot++; for (f = 1; f < geo->far_copies; f++) { d += geo->near_copies; - if (d >= geo->raid_disks) - d -= geo->raid_disks; + d %= geo->raid_disks; s += geo->stride; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; From 475901aff15841fb0a81e7546517407779a9b061 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 21 Feb 2013 13:28:10 +1100 Subject: [PATCH 022/261] MD RAID10: Improve redundancy for 'far' and 'offset' algorithms (part 1) The MD RAID10 'far' and 'offset' algorithms make copies of entire stripe widths - copying them to a different location on the same devices after shifting the stripe. An example layout of each follows below: "far" algorithm dev1 dev2 dev3 dev4 dev5 dev6 ==== ==== ==== ==== ==== ==== A B C D E F G H I J K L ... F A B C D E --> Copy of stripe0, but shifted by 1 L G H I J K ... "offset" algorithm dev1 dev2 dev3 dev4 dev5 dev6 ==== ==== ==== ==== ==== ==== A B C D E F F A B C D E --> Copy of stripe0, but shifted by 1 G H I J K L L G H I J K ... Redundancy for these algorithms is gained by shifting the copied stripes one device to the right. This patch proposes that array be divided into sets of adjacent devices and when the stripe copies are shifted, they wrap on set boundaries rather than the array size boundary. That is, for the purposes of shifting, the copies are confined to their sets within the array. The sets are 'near_copies * far_copies' in size. The above "far" algorithm example would change to: "far" algorithm dev1 dev2 dev3 dev4 dev5 dev6 ==== ==== ==== ==== ==== ==== A B C D E F G H I J K L ... B A D C F E --> Copy of stripe0, shifted 1, 2-dev sets H G J I L K Dev sets are 1-2, 3-4, 5-6 ... This has the affect of improving the redundancy of the array. We can always sustain at least one failure, but sometimes more than one can be handled. In the first examples, the pairs of devices that CANNOT fail together are: (1,2) (2,3) (3,4) (4,5) (5,6) (1, 6) [40% of possible pairs] In the example where the copies are confined to sets, the pairs of devices that cannot fail together are: (1,2) (3,4) (5,6) [20% of possible pairs] We cannot simply replace the old algorithms, so the 17th bit of the 'layout' variable is used to indicate whether we use the old or new method of computing the shift. (This is similar to the way the 16th bit indicates whether the "far" algorithm or the "offset" algorithm is being used.) This patch only handles the cases where the number of total raid disks is a multiple of 'far_copies'. A follow-on patch addresses the condition where this is not true. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/raid10.c | 58 +++++++++++++++++++++++++++++++-------------- drivers/md/raid10.h | 5 ++++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index de174ad6f8bd..70b58b4bcf89 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -38,21 +38,36 @@ * near_copies (stored in low byte of layout) * far_copies (stored in second byte of layout) * far_offset (stored in bit 16 of layout ) + * use_far_sets (stored in bit 17 of layout ) * - * The data to be stored is divided into chunks using chunksize. - * Each device is divided into far_copies sections. - * In each section, chunks are laid out in a style similar to raid0, but - * near_copies copies of each chunk is stored (each on a different drive). - * The starting device for each section is offset near_copies from the starting - * device of the previous section. - * Thus they are (near_copies*far_copies) of each chunk, and each is on a different - * drive. - * near_copies and far_copies must be at least one, and their product is at most - * raid_disks. + * The data to be stored is divided into chunks using chunksize. Each device + * is divided into far_copies sections. In each section, chunks are laid out + * in a style similar to raid0, but near_copies copies of each chunk is stored + * (each on a different drive). The starting device for each section is offset + * near_copies from the starting device of the previous section. Thus there + * are (near_copies * far_copies) of each chunk, and each is on a different + * drive. near_copies and far_copies must be at least one, and their product + * is at most raid_disks. * * If far_offset is true, then the far_copies are handled a bit differently. - * The copies are still in different stripes, but instead of be very far apart - * on disk, there are adjacent stripes. + * The copies are still in different stripes, but instead of being very far + * apart on disk, there are adjacent stripes. + * + * The far and offset algorithms are handled slightly differently if + * 'use_far_sets' is true. In this case, the array's devices are grouped into + * sets that are (near_copies * far_copies) in size. The far copied stripes + * are still shifted by 'near_copies' devices, but this shifting stays confined + * to the set rather than the entire array. This is done to improve the number + * of device combinations that can fail without causing the array to fail. + * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk + * on a device): + * A B C D A B C D E + * ... ... + * D A B C E A B C D + * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s): + * [A B] [C D] [A B] [C D E] + * |...| |...| |...| | ... | + * [B A] [D C] [B A] [E C D] */ /* @@ -551,14 +566,18 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) /* and calculate all the others */ for (n = 0; n < geo->near_copies; n++) { int d = dev; + int set; sector_t s = sector; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; slot++; for (f = 1; f < geo->far_copies; f++) { + set = d / geo->far_set_size; d += geo->near_copies; - d %= geo->raid_disks; + d %= geo->far_set_size; + d += geo->far_set_size * set; + s += geo->stride; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; @@ -594,6 +613,8 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) * or recovery, so reshape isn't happening */ struct geom *geo = &conf->geo; + int far_set_start = (dev / geo->far_set_size) * geo->far_set_size; + int far_set_size = geo->far_set_size; offset = sector & geo->chunk_mask; if (geo->far_offset) { @@ -601,13 +622,13 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) chunk = sector >> geo->chunk_shift; fc = sector_div(chunk, geo->far_copies); dev -= fc * geo->near_copies; - if (dev < 0) - dev += geo->raid_disks; + if (dev < far_set_start) + dev += far_set_size; } else { while (sector >= geo->stride) { sector -= geo->stride; - if (dev < geo->near_copies) - dev += geo->raid_disks - geo->near_copies; + if (dev < (geo->near_copies + far_set_start)) + dev += far_set_size - geo->near_copies; else dev -= geo->near_copies; } @@ -3438,7 +3459,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) disks = mddev->raid_disks + mddev->delta_disks; break; } - if (layout >> 17) + if (layout >> 18) return -1; if (chunk < (PAGE_SIZE >> 9) || !is_power_of_2(chunk)) @@ -3450,6 +3471,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) geo->near_copies = nc; geo->far_copies = fc; geo->far_offset = fo; + geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; geo->chunk_mask = chunk - 1; geo->chunk_shift = ffz(~chunk); return nc*fc; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 1054cf602345..157d69e83ff4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -33,6 +33,11 @@ struct r10conf { * far_offset, in which case it is * 1 stripe. */ + int far_set_size; /* The number of devices in a set, + * where a 'set' are devices that + * contain far/offset copies of + * each other. + */ int chunk_shift; /* shift from chunks to sectors */ sector_t chunk_mask; } prev, geo; From 9a3152ab024867100f2f50d124b998d05fb1c3f6 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 21 Feb 2013 13:28:10 +1100 Subject: [PATCH 023/261] MD RAID10: Improve redundancy for 'far' and 'offset' algorithms (part 2) MD RAID10: Improve redundancy for 'far' and 'offset' algorithms (part 2) This patch addresses raid arrays that have a number of devices that cannot be evenly divided by 'far_copies'. (E.g. 5 devices, far_copies = 2) This case must be handled differently because it causes that last set to be of a different size than the rest of the sets. We must compute a new modulo for this last set so that copied chunks are properly wrapped around. Example use_far_sets=1, far_copies=2, near_copies=1, devices=5: "far" algorithm dev1 dev2 dev3 dev4 dev5 ==== ==== ==== ==== ==== [ A B ] [ C D E ] [ G H ] [ I J K ] ... [ B A ] [ E C D ] --> nominal set of 2 and last set of 3 [ H G ] [ K I J ] []'s show far/offset sets Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/raid10.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 70b58b4bcf89..61ed150bd0cf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -550,6 +550,13 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) sector_t stripe; int dev; int slot = 0; + int last_far_set_start, last_far_set_size; + + last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1; + last_far_set_start *= geo->far_set_size; + + last_far_set_size = geo->far_set_size; + last_far_set_size += (geo->raid_disks % geo->far_set_size); /* now calculate first sector/dev */ chunk = r10bio->sector >> geo->chunk_shift; @@ -575,9 +582,16 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) for (f = 1; f < geo->far_copies; f++) { set = d / geo->far_set_size; d += geo->near_copies; - d %= geo->far_set_size; - d += geo->far_set_size * set; + if ((geo->raid_disks % geo->far_set_size) && + (d > last_far_set_start)) { + d -= last_far_set_start; + d %= last_far_set_size; + d += last_far_set_start; + } else { + d %= geo->far_set_size; + d += geo->far_set_size * set; + } s += geo->stride; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; @@ -615,6 +629,18 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) struct geom *geo = &conf->geo; int far_set_start = (dev / geo->far_set_size) * geo->far_set_size; int far_set_size = geo->far_set_size; + int last_far_set_start; + + if (geo->raid_disks % geo->far_set_size) { + last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1; + last_far_set_start *= geo->far_set_size; + + if (dev >= last_far_set_start) { + far_set_size = geo->far_set_size; + far_set_size += (geo->raid_disks % geo->far_set_size); + far_set_start = last_far_set_start; + } + } offset = sector & geo->chunk_mask; if (geo->far_offset) { From fe5d2f4a15967bbe907e7b3e31e49dae7af7cc6b Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 21 Feb 2013 13:28:10 +1100 Subject: [PATCH 024/261] DM RAID: Add support for MD's RAID10 "far" and "offset" algorithms DM RAID: Add support for MD's RAID10 "far" and "offset" algorithms Until now, dm-raid.c only supported the "near" algorthm of MD's RAID10 implementation. This patch adds support for the "far" and "offset" algorithms, but only with the improved redundancy that is brought with the introduction of the 'use_far_sets' bit, which shifts copied stripes according to smaller sets vs the entire array. That is, the 17th bit of the 'layout' variable that defines the RAID10 implementation will always be set. (More information on how the 'layout' variable selects the RAID10 algorithm can be found in the opening comments of drivers/md/raid10.c.) Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- Documentation/device-mapper/dm-raid.txt | 44 +++++++-- drivers/md/dm-raid.c | 123 ++++++++++++++++++++---- 2 files changed, 140 insertions(+), 27 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 56fb62b09fc5..b428556197c9 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -30,6 +30,7 @@ The target is named "raid" and it accepts the following parameters: raid10 Various RAID10 inspired algorithms chosen by additional params - RAID10: Striped Mirrors (aka 'Striping on top of mirrors') - RAID1E: Integrated Adjacent Stripe Mirroring + - RAID1E: Integrated Offset Stripe Mirroring - and other similar RAID10 variants Reference: Chapter 4 of @@ -64,15 +65,15 @@ The target is named "raid" and it accepts the following parameters: synchronisation state for each region. [raid10_copies <# copies>] - [raid10_format near] + [raid10_format ] These two options are used to alter the default layout of a RAID10 configuration. The number of copies is can be - specified, but the default is 2. There are other variations - to how the copies are laid down - the default and only current - option is "near". Near copies are what most people think of - with respect to mirroring. If these options are left - unspecified, or 'raid10_copies 2' and/or 'raid10_format near' - are given, then the layouts for 2, 3 and 4 devices are: + specified, but the default is 2. There are also three + variations to how the copies are laid down - the default + is "near". Near copies are what most people think of with + respect to mirroring. If these options are left unspecified, + or 'raid10_copies 2' and/or 'raid10_format near' are given, + then the layouts for 2, 3 and 4 devices are: 2 drives 3 drives 4 drives -------- ---------- -------------- A1 A1 A1 A1 A2 A1 A1 A2 A2 @@ -85,6 +86,33 @@ The target is named "raid" and it accepts the following parameters: 3-device layout is what might be called a 'RAID1E - Integrated Adjacent Stripe Mirroring'. + If 'raid10_copies 2' and 'raid10_format far', then the layouts + for 2, 3 and 4 devices are: + 2 drives 3 drives 4 drives + -------- -------------- -------------------- + A1 A2 A1 A2 A3 A1 A2 A3 A4 + A3 A4 A4 A5 A6 A5 A6 A7 A8 + A5 A6 A7 A8 A9 A9 A10 A11 A12 + .. .. .. .. .. .. .. .. .. + A2 A1 A3 A1 A2 A2 A1 A4 A3 + A4 A3 A6 A4 A5 A6 A5 A8 A7 + A6 A5 A9 A7 A8 A10 A9 A12 A11 + .. .. .. .. .. .. .. .. .. + + If 'raid10_copies 2' and 'raid10_format offset', then the + layouts for 2, 3 and 4 devices are: + 2 drives 3 drives 4 drives + -------- ------------ ----------------- + A1 A2 A1 A2 A3 A1 A2 A3 A4 + A2 A1 A3 A1 A2 A2 A1 A4 A3 + A3 A4 A4 A5 A6 A5 A6 A7 A8 + A4 A3 A6 A4 A5 A6 A5 A8 A7 + A5 A6 A7 A8 A9 A9 A10 A11 A12 + A6 A5 A9 A7 A8 A10 A9 A12 A11 + .. .. .. .. .. .. .. .. .. + Here we see layouts closely akin to 'RAID1E - Integrated + Offset Stripe Mirroring'. + <#raid_devs>: The number of devices composing the array. Each device consists of two entries. The first is the device containing the metadata (if any); the second is the one containing the @@ -142,3 +170,5 @@ Version History 1.3.0 Added support for RAID 10 1.3.1 Allow device replacement/rebuild for RAID 10 1.3.2 Fix/improve redundancy checking for RAID10 +1.4.0 Non-functional change. Removes arg from mapping function. +1.4.1 Add RAID10 "far" and "offset" algorithm support. diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9e58dbd8d8cb..22fd55993723 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -91,15 +91,44 @@ static struct raid_type { {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} }; +static char *raid10_md_layout_to_format(int layout) +{ + /* + * Bit 16 and 17 stand for "offset" and "use_far_sets" + * Refer to MD's raid10.c for details + */ + if ((layout & 0x10000) && (layout & 0x20000)) + return "offset"; + + if ((layout & 0xFF) > 1) + return "near"; + + return "far"; +} + static unsigned raid10_md_layout_to_copies(int layout) { - return layout & 0xFF; + if ((layout & 0xFF) > 1) + return layout & 0xFF; + return (layout >> 8) & 0xFF; } static int raid10_format_to_md_layout(char *format, unsigned copies) { - /* 1 "far" copy, and 'copies' "near" copies */ - return (1 << 8) | (copies & 0xFF); + unsigned n = 1, f = 1; + + if (!strcmp("near", format)) + n = copies; + else + f = copies; + + if (!strcmp("offset", format)) + return 0x30000 | (f << 8) | n; + + if (!strcmp("far", format)) + return 0x20000 | (f << 8) | n; + + return (f << 8) | n; } static struct raid_type *get_raid_type(char *name) @@ -352,6 +381,7 @@ static int validate_raid_redundancy(struct raid_set *rs) { unsigned i, rebuild_cnt = 0; unsigned rebuilds_per_group, copies, d; + unsigned group_size, last_group_start; for (i = 0; i < rs->md.raid_disks; i++) if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || @@ -379,9 +409,6 @@ static int validate_raid_redundancy(struct raid_set *rs) * as long as the failed devices occur in different mirror * groups (i.e. different stripes). * - * Right now, we only allow for "near" copies. When other - * formats are added, we will have to check those too. - * * When checking "near" format, make sure no adjacent devices * have failed beyond what can be handled. In addition to the * simple case where the number of devices is a multiple of the @@ -391,14 +418,41 @@ static int validate_raid_redundancy(struct raid_set *rs) * A A B B C * C D D E E */ - for (i = 0; i < rs->md.raid_disks * copies; i++) { - if (!(i % copies)) + if (!strcmp("near", raid10_md_layout_to_format(rs->md.layout))) { + for (i = 0; i < rs->md.raid_disks * copies; i++) { + if (!(i % copies)) + rebuilds_per_group = 0; + d = i % rs->md.raid_disks; + if ((!rs->dev[d].rdev.sb_page || + !test_bit(In_sync, &rs->dev[d].rdev.flags)) && + (++rebuilds_per_group >= copies)) + goto too_many; + } + break; + } + + /* + * When checking "far" and "offset" formats, we need to ensure + * that the device that holds its copy is not also dead or + * being rebuilt. (Note that "far" and "offset" formats only + * support two copies right now. These formats also only ever + * use the 'use_far_sets' variant.) + * + * This check is somewhat complicated by the need to account + * for arrays that are not a multiple of (far) copies. This + * results in the need to treat the last (potentially larger) + * set differently. + */ + group_size = (rs->md.raid_disks / copies); + last_group_start = (rs->md.raid_disks / group_size) - 1; + last_group_start *= group_size; + for (i = 0; i < rs->md.raid_disks; i++) { + if (!(i % copies) && !(i > last_group_start)) rebuilds_per_group = 0; - d = i % rs->md.raid_disks; - if ((!rs->dev[d].rdev.sb_page || - !test_bit(In_sync, &rs->dev[d].rdev.flags)) && + if ((!rs->dev[i].rdev.sb_page || + !test_bit(In_sync, &rs->dev[i].rdev.flags)) && (++rebuilds_per_group >= copies)) - goto too_many; + goto too_many; } break; default: @@ -433,7 +487,7 @@ too_many: * * RAID10-only options: * [raid10_copies <# copies>] Number of copies. (Default: 2) - * [raid10_format ] Layout algorithm. (Default: near) + * [raid10_format ] Layout algorithm. (Default: near) */ static int parse_raid_params(struct raid_set *rs, char **argv, unsigned num_raid_params) @@ -520,7 +574,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv, rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; return -EINVAL; } - if (strcmp("near", argv[i])) { + if (strcmp("near", argv[i]) && + strcmp("far", argv[i]) && + strcmp("offset", argv[i])) { rs->ti->error = "Invalid 'raid10_format' value given"; return -EINVAL; } @@ -644,6 +700,15 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } + /* + * If the format is not "near", we only support + * two copies at the moment. + */ + if (strcmp("near", raid10_format) && (raid10_copies > 2)) { + rs->ti->error = "Too many copies for given RAID10 format."; + return -EINVAL; + } + /* (Len * #mirrors) / #devices */ sectors_per_dev = rs->ti->len * raid10_copies; sector_div(sectors_per_dev, rs->md.raid_disks); @@ -854,17 +919,30 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) /* * Reshaping is not currently allowed */ - if ((le32_to_cpu(sb->level) != mddev->level) || - (le32_to_cpu(sb->layout) != mddev->layout) || - (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { - DMERR("Reshaping arrays not yet supported."); + if (le32_to_cpu(sb->level) != mddev->level) { + DMERR("Reshaping arrays not yet supported. (RAID level change)"); + return -EINVAL; + } + if (le32_to_cpu(sb->layout) != mddev->layout) { + DMERR("Reshaping arrays not yet supported. (RAID layout change)"); + DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout); + DMERR(" Old layout: %s w/ %d copies", + raid10_md_layout_to_format(le32_to_cpu(sb->layout)), + raid10_md_layout_to_copies(le32_to_cpu(sb->layout))); + DMERR(" New layout: %s w/ %d copies", + raid10_md_layout_to_format(mddev->layout), + raid10_md_layout_to_copies(mddev->layout)); + return -EINVAL; + } + if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) { + DMERR("Reshaping arrays not yet supported. (stripe sectors change)"); return -EINVAL; } /* We can only change the number of devices in RAID1 right now */ if ((rs->raid_type->level != 1) && (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { - DMERR("Reshaping arrays not yet supported."); + DMERR("Reshaping arrays not yet supported. (device count change)"); return -EINVAL; } @@ -1329,7 +1407,8 @@ static int raid_status(struct dm_target *ti, status_type_t type, raid10_md_layout_to_copies(rs->md.layout)); if (rs->print_flags & DMPF_RAID10_FORMAT) - DMEMIT(" raid10_format near"); + DMEMIT(" raid10_format %s", + raid10_md_layout_to_format(rs->md.layout)); DMEMIT(" %d", rs->md.raid_disks); for (i = 0; i < rs->md.raid_disks; i++) { @@ -1420,6 +1499,10 @@ static struct target_type raid_target = { static int __init dm_raid_init(void) { + DMINFO("Loading target version %u.%u.%u", + raid_target.version[0], + raid_target.version[1], + raid_target.version[2]); return dm_register_target(&raid_target); } From a64685399181780998281fe07309a94b25dd24c3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Feb 2013 14:33:17 +1100 Subject: [PATCH 025/261] md: fix two bugs when attempting to resize RAID0 array. You cannot resize a RAID0 array (in terms of making the devices bigger), but the code doesn't entirely stop you. So: disable setting of the available size on each device for RAID0 and Linear devices. This must not change as doing so can change the effective layout of data. Make sure that the size that raid0_size() reports is accurate, but rounding devices sizes to chunk sizes. As the device sizes cannot change now, this isn't so important, but it is best to be safe. Without this change: mdadm --grow /dev/md0 -z max mdadm --grow /dev/md0 -Z max then read to the end of the array can cause a BUG in a RAID0 array. These bugs have been present ever since it became possible to resize any device, which is a long time. So the fix is suitable for any -stable kerenl. Cc: stable@vger.kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 3 +++ drivers/md/raid0.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1e634a68541e..f363135144f6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2998,6 +2998,9 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (!sectors) sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - rdev->data_offset; + if (!my_mddev->pers->resize) + /* Cannot change size for RAID0 or Linear etc */ + return -EINVAL; } if (sectors < my_mddev->dev_sectors) return -EINVAL; /* component must fit device */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 24b359717a7e..15c8d3505450 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -411,7 +411,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks "%s does not support generic reshape\n", __func__); rdev_for_each(rdev, mddev) - array_sectors += rdev->sectors; + array_sectors += (rdev->sectors & + ~(sector_t)(mddev->chunk_sectors-1)); return array_sectors; } From 58ebb34c49fcfcaa029e4b1c1453d92583900f9a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Feb 2013 15:36:38 +1100 Subject: [PATCH 026/261] md: raid0: fix error return from create_stripe_zones. Create_stripe_zones returns an error slightly differently to raid0_run and to raid0_takeover_*. The error returned used by the second was wrong and an error would result in mddev->private being set to NULL and sooner or later a crash. So never return NULL, return ERR_PTR(err), not NULL from create_stripe_zones. This bug has been present since 2.6.35 so the fix is suitable for any kernel since then. Cc: stable@vger.kernel.org Signed-off-by: NeilBrown --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 15c8d3505450..d9babda582b9 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -289,7 +289,7 @@ abort: kfree(conf->strip_zone); kfree(conf->devlist); kfree(conf); - *private_conf = NULL; + *private_conf = ERR_PTR(err); return err; } From f96c9f305c24a0d4a075e2c75aa6b417aa238687 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Feb 2013 15:50:07 +1100 Subject: [PATCH 027/261] md/raid0: improve error message when converting RAID4-with-spares to RAID0 Mentioning "bad disk number -1" exposes irrelevant internal detail. Just say they are inactive and must be removed. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d9babda582b9..0505452de8d6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -175,7 +175,13 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) rdev1->new_raid_disk = j; } - if (j < 0 || j >= mddev->raid_disks) { + if (j < 0) { + printk(KERN_ERR + "md/raid0:%s: remove inactive devices before converting to RAID0\n", + mdname(mddev)); + goto abort; + } + if (j >= mddev->raid_disks) { printk(KERN_ERR "md/raid0:%s: bad disk number %d - " "aborting!\n", mdname(mddev), j); goto abort; From ee0b0244030434cdda26777bfb98962447e080cd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 25 Feb 2013 12:38:29 +1100 Subject: [PATCH 028/261] md/raid1,raid10: fix deadlock with freeze_array() When raid1/raid10 needs to fix a read error, it first drains all pending requests by calling freeze_array(). This calls flush_pending_writes() if it needs to sleep, but some writes may be pending in a per-process plug rather than in the per-array request queue. When raid1{,0}_unplug() moves the request from the per-process plug to the per-array request queue (from which flush_pending_writes() can flush them), it needs to wake up freeze_array(), or freeze_array() will never flush them and so it will block forever. So add the requires wake_up() calls. This bug was introduced by commit f54a9d0e59c4bea3db733921ca9147612a6f292c for raid1 and a similar commit for RAID10, and so has been present since linux-3.6. As the bug causes a deadlock I believe this fix is suitable for -stable. Cc: stable@vger.kernel.org (3.6.y 3.7.y 3.8.y) Reported-by: Tregaron Bayly Tested-by: Tregaron Bayly Signed-off-by: NeilBrown --- drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6e5d5a5f9cb4..fd86b372692d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -967,6 +967,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) bio_list_merge(&conf->pending_bio_list, &plug->pending); conf->pending_count += plug->pending_cnt; spin_unlock_irq(&conf->device_lock); + wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); kfree(plug); return; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 61ed150bd0cf..77b562d18a90 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1119,6 +1119,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) bio_list_merge(&conf->pending_bio_list, &plug->pending); conf->pending_count += plug->pending_cnt; spin_unlock_irq(&conf->device_lock); + wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); kfree(plug); return; From 3a68f19d7afb80f548d016effbc6ed52643a8085 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 20 Dec 2012 18:48:20 +0000 Subject: [PATCH 029/261] sfc: Properly sync RX DMA buffer when it is not the last in the page We may currently allocate two RX DMA buffers to a page, and only unmap the page when the second is completed. We do not sync the first RX buffer to be completed; this can result in packet loss or corruption if the last RX buffer completed in a NAPI poll is the first in a page and is not DMA-coherent. (In the middle of a NAPI poll, we will handle the following RX completion and unmap the page *before* looking at the content of the first buffer.) Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/rx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index d780a0d096b4..a5754916478e 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -236,7 +236,8 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) } static void efx_unmap_rx_buffer(struct efx_nic *efx, - struct efx_rx_buffer *rx_buf) + struct efx_rx_buffer *rx_buf, + unsigned int used_len) { if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) { struct efx_rx_page_state *state; @@ -247,6 +248,10 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx, state->dma_addr, efx_rx_buf_size(efx), DMA_FROM_DEVICE); + } else if (used_len) { + dma_sync_single_for_cpu(&efx->pci_dev->dev, + rx_buf->dma_addr, used_len, + DMA_FROM_DEVICE); } } else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) { dma_unmap_single(&efx->pci_dev->dev, rx_buf->dma_addr, @@ -269,7 +274,7 @@ static void efx_free_rx_buffer(struct efx_nic *efx, static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) { - efx_unmap_rx_buffer(rx_queue->efx, rx_buf); + efx_unmap_rx_buffer(rx_queue->efx, rx_buf, 0); efx_free_rx_buffer(rx_queue->efx, rx_buf); } @@ -535,10 +540,10 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, goto out; } - /* Release card resources - assumes all RX buffers consumed in-order - * per RX queue + /* Release and/or sync DMA mapping - assumes all RX buffers + * consumed in-order per RX queue */ - efx_unmap_rx_buffer(efx, rx_buf); + efx_unmap_rx_buffer(efx, rx_buf, len); /* Prefetch nice and early so data will (hopefully) be in cache by * the time we look at it. From b590ace09d51cd39744e0f7662c5e4a0d1b5d952 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 10 Jan 2013 23:51:54 +0000 Subject: [PATCH 030/261] sfc: Fix efx_rx_buf_offset() in the presence of swiotlb We assume that the mapping between DMA and virtual addresses is done on whole pages, so we can find the page offset of an RX buffer using the lower bits of the DMA address. However, swiotlb maps in units of 2K, breaking this assumption. Add an explicit page_offset field to struct efx_rx_buffer. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/net_driver.h | 4 +++- drivers/net/ethernet/sfc/rx.c | 10 +++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 2d756c1d7142..0a90abd2421b 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -210,6 +210,7 @@ struct efx_tx_queue { * Will be %NULL if the buffer slot is currently free. * @page: The associated page buffer. Valif iff @flags & %EFX_RX_BUF_PAGE. * Will be %NULL if the buffer slot is currently free. + * @page_offset: Offset within page. Valid iff @flags & %EFX_RX_BUF_PAGE. * @len: Buffer length, in bytes. * @flags: Flags for buffer and packet state. */ @@ -219,7 +220,8 @@ struct efx_rx_buffer { struct sk_buff *skb; struct page *page; } u; - unsigned int len; + u16 page_offset; + u16 len; u16 flags; }; #define EFX_RX_BUF_PAGE 0x0001 diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index a5754916478e..879ff5849bbd 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -90,11 +90,7 @@ static unsigned int rx_refill_threshold; static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx, struct efx_rx_buffer *buf) { - /* Offset is always within one page, so we don't need to consider - * the page order. - */ - return ((unsigned int) buf->dma_addr & (PAGE_SIZE - 1)) + - efx->type->rx_buffer_hash_size; + return buf->page_offset + efx->type->rx_buffer_hash_size; } static inline unsigned int efx_rx_buf_size(struct efx_nic *efx) { @@ -187,6 +183,7 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; struct page *page; + unsigned int page_offset; struct efx_rx_page_state *state; dma_addr_t dma_addr; unsigned index, count; @@ -211,12 +208,14 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) state->dma_addr = dma_addr; dma_addr += sizeof(struct efx_rx_page_state); + page_offset = sizeof(struct efx_rx_page_state); split: index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; rx_buf->u.page = page; + rx_buf->page_offset = page_offset; rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; rx_buf->flags = EFX_RX_BUF_PAGE; ++rx_queue->added_count; @@ -227,6 +226,7 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) /* Use the second half of the page */ get_page(page); dma_addr += (PAGE_SIZE >> 1); + page_offset += (PAGE_SIZE >> 1); ++count; goto split; } From 29c69a4882641285a854d6d03ca5adbba68c0034 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 28 Jan 2013 19:01:06 +0000 Subject: [PATCH 031/261] sfc: Detach net device when stopping queues for reconfiguration We must only ever stop TX queues when they are full or the net device is not 'ready' so far as the net core, and specifically the watchdog, is concerned. Otherwise, the watchdog may fire *immediately* if no packets have been added to the queue in the last 5 seconds. The device is ready if all the following are true: (a) It has a qdisc (b) It is marked present (c) It is running (d) The link is reported up (a) and (c) are normally true, and must not be changed by a driver. (d) is under our control, but fake link changes may disturb userland. This leaves (b). We already mark the device absent during reset and self-test, but we need to do the same during MTU changes and ring reallocation. We don't need to do this when the device is brought down because then (c) is already false. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/efx.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index bf57b3cb16ab..0bc00991d310 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -779,6 +779,7 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) tx_queue->txd.entries); } + efx_device_detach_sync(efx); efx_stop_all(efx); efx_stop_interrupts(efx, true); @@ -832,6 +833,7 @@ out: efx_start_interrupts(efx, true); efx_start_all(efx); + netif_device_attach(efx->net_dev); return rc; rollback: @@ -1641,8 +1643,12 @@ static void efx_stop_all(struct efx_nic *efx) /* Flush efx_mac_work(), refill_workqueue, monitor_work */ efx_flush_all(efx); - /* Stop the kernel transmit interface late, so the watchdog - * timer isn't ticking over the flush */ + /* Stop the kernel transmit interface. This is only valid if + * the device is stopped or detached; otherwise the watchdog + * may fire immediately. + */ + WARN_ON(netif_running(efx->net_dev) && + netif_device_present(efx->net_dev)); netif_tx_disable(efx->net_dev); efx_stop_datapath(efx); @@ -1963,16 +1969,18 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) if (new_mtu > EFX_MAX_MTU) return -EINVAL; - efx_stop_all(efx); - netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); + efx_device_detach_sync(efx); + efx_stop_all(efx); + mutex_lock(&efx->mac_lock); net_dev->mtu = new_mtu; efx->type->reconfigure_mac(efx); mutex_unlock(&efx->mac_lock); efx_start_all(efx); + netif_device_attach(efx->net_dev); return 0; } From a7679ed5a0e92c87eeef33ae463e39a843561836 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Mon, 25 Feb 2013 14:58:05 -0600 Subject: [PATCH 032/261] mac80211: Ensure off-channel frames don't get queued Commit 6c17b77b67587b9f9e3070fb89fe98cef3187131 (mac80211: Fix tx queue handling during scans) contains a bug that causes off-channel frames to get queued when they should be handed down to the driver for transmit. Prevent this from happening. Reported-by: Fabio Rossi Signed-off-by: Seth Forshee Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 56 ++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bb05a0f86034..c592a413bad9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1231,34 +1231,40 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { if (unlikely(info->flags & - IEEE80211_TX_INTFL_OFFCHAN_TX_OK && - local->queue_stop_reasons[q] & - ~BIT(IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL))) { + IEEE80211_TX_INTFL_OFFCHAN_TX_OK)) { + if (local->queue_stop_reasons[q] & + ~BIT(IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL)) { + /* + * Drop off-channel frames if queues + * are stopped for any reason other + * than off-channel operation. Never + * queue them. + */ + spin_unlock_irqrestore( + &local->queue_stop_reason_lock, + flags); + ieee80211_purge_tx_queue(&local->hw, + skbs); + return true; + } + } else { + /* - * Drop off-channel frames if queues are stopped - * for any reason other than off-channel - * operation. Never queue them. + * Since queue is stopped, queue up frames for + * later transmission from the tx-pending + * tasklet when the queue is woken again. */ - spin_unlock_irqrestore( - &local->queue_stop_reason_lock, flags); - ieee80211_purge_tx_queue(&local->hw, skbs); - return true; + if (txpending) + skb_queue_splice_init(skbs, + &local->pending[q]); + else + skb_queue_splice_tail_init(skbs, + &local->pending[q]); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + return false; } - - /* - * Since queue is stopped, queue up frames for later - * transmission from the tx-pending tasklet when the - * queue is woken again. - */ - if (txpending) - skb_queue_splice_init(skbs, &local->pending[q]); - else - skb_queue_splice_tail_init(skbs, - &local->pending[q]); - - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); - return false; } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); From b759f4ddcafb6414056cf3e49f2ab12359101c2e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 23 Feb 2013 18:40:57 +0100 Subject: [PATCH 033/261] mac80211: fix idle handling in monitor mode When the driver does not want a monitor mode VIF, no channel context is allocated for it. This causes ieee80211_recalc_idle to put the hardware into idle mode if only a monitor mode is active, breaking injection. Fix this by checking local->monitors in addition to active channel contexts. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2c059e54e885..640afab304d7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -107,7 +107,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) lockdep_assert_held(&local->mtx); - active = !list_empty(&local->chanctx_list); + active = !list_empty(&local->chanctx_list) || local->monitors; if (!local->ops->remain_on_channel) { list_for_each_entry(roc, &local->roc_list, list) { From cb601ffa326bc5c74a6ecd8e72ae9631e5f12f75 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 23 Feb 2013 19:02:14 +0100 Subject: [PATCH 034/261] mac80211: fix monitor mode channel reporting When not using channel contexts with only monitor mode interfaces being active, report local->monitor_chandef to userspace. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 09d96a8f6c2c..808f5fcd1ced 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3285,13 +3285,19 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_chanctx_conf *chanctx_conf; int ret = -ENODATA; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) { - *chandef = chanctx_conf->def; + if (local->use_chanctx) { + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (chanctx_conf) { + *chandef = chanctx_conf->def; + ret = 0; + } + } else if (local->open_count == local->monitors) { + *chandef = local->monitor_chandef; ret = 0; } rcu_read_unlock(); From 8a964f44e01ad3bbc208c3e80d931ba91b9ea786 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Feb 2013 16:01:34 +0100 Subject: [PATCH 035/261] iwlwifi: always copy first 16 bytes of commands The FH hardware will always write back to the scratch field in commands, even host commands not just TX commands, which can overwrite parts of the command. This is problematic if the command is re-used (with IWL_HCMD_DFL_NOCOPY) and can cause calibration issues. Address this problem by always putting at least the first 16 bytes into the buffer we also use for the command header and therefore make the DMA engine write back into this. For commands that are smaller than 16 bytes also always map enough memory for the DMA engine to write back to. Cc: stable@vger.kernel.org Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/iwl-devtrace.h | 10 ++- drivers/net/wireless/iwlwifi/pcie/internal.h | 9 +++ drivers/net/wireless/iwlwifi/pcie/tx.c | 75 +++++++++++++++----- 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace.h b/drivers/net/wireless/iwlwifi/iwl-devtrace.h index 9a0f45ec9e01..10f01793d7a6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-devtrace.h +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace.h @@ -349,25 +349,23 @@ TRACE_EVENT(iwlwifi_dev_rx_data, TRACE_EVENT(iwlwifi_dev_hcmd, TP_PROTO(const struct device *dev, struct iwl_host_cmd *cmd, u16 total_size, - const void *hdr, size_t hdr_len), - TP_ARGS(dev, cmd, total_size, hdr, hdr_len), + struct iwl_cmd_header *hdr), + TP_ARGS(dev, cmd, total_size, hdr), TP_STRUCT__entry( DEV_ENTRY __dynamic_array(u8, hcmd, total_size) __field(u32, flags) ), TP_fast_assign( - int i, offset = hdr_len; + int i, offset = sizeof(*hdr); DEV_ASSIGN; __entry->flags = cmd->flags; - memcpy(__get_dynamic_array(hcmd), hdr, hdr_len); + memcpy(__get_dynamic_array(hcmd), hdr, sizeof(*hdr)); for (i = 0; i < IWL_MAX_CMD_TFDS; i++) { if (!cmd->len[i]) continue; - if (!(cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY)) - continue; memcpy((u8 *)__get_dynamic_array(hcmd) + offset, cmd->data[i], cmd->len[i]); offset += cmd->len[i]; diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h index aa2a39a637dd..3d62e8055352 100644 --- a/drivers/net/wireless/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/iwlwifi/pcie/internal.h @@ -182,6 +182,15 @@ struct iwl_queue { #define TFD_TX_CMD_SLOTS 256 #define TFD_CMD_SLOTS 32 +/* + * The FH will write back to the first TB only, so we need + * to copy some data into the buffer regardless of whether + * it should be mapped or not. This indicates how much to + * copy, even for HCMDs it must be big enough to fit the + * DRAM scratch from the TX cmd, at least 16 bytes. + */ +#define IWL_HCMD_MIN_COPY_SIZE 16 + struct iwl_pcie_txq_entry { struct iwl_device_cmd *cmd; struct iwl_device_cmd *copy_cmd; diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 8e9e3212fe78..8b625a7f5685 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -1152,10 +1152,12 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, void *dup_buf = NULL; dma_addr_t phys_addr; int idx; - u16 copy_size, cmd_size; + u16 copy_size, cmd_size, dma_size; bool had_nocopy = false; int i; u32 cmd_pos; + const u8 *cmddata[IWL_MAX_CMD_TFDS]; + u16 cmdlen[IWL_MAX_CMD_TFDS]; copy_size = sizeof(out_cmd->hdr); cmd_size = sizeof(out_cmd->hdr); @@ -1164,8 +1166,23 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, BUILD_BUG_ON(IWL_MAX_CMD_TFDS > IWL_NUM_OF_TBS - 1); for (i = 0; i < IWL_MAX_CMD_TFDS; i++) { + cmddata[i] = cmd->data[i]; + cmdlen[i] = cmd->len[i]; + if (!cmd->len[i]) continue; + + /* need at least IWL_HCMD_MIN_COPY_SIZE copied */ + if (copy_size < IWL_HCMD_MIN_COPY_SIZE) { + int copy = IWL_HCMD_MIN_COPY_SIZE - copy_size; + + if (copy > cmdlen[i]) + copy = cmdlen[i]; + cmdlen[i] -= copy; + cmddata[i] += copy; + copy_size += copy; + } + if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) { had_nocopy = true; if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) { @@ -1185,7 +1202,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, goto free_dup_buf; } - dup_buf = kmemdup(cmd->data[i], cmd->len[i], + dup_buf = kmemdup(cmddata[i], cmdlen[i], GFP_ATOMIC); if (!dup_buf) return -ENOMEM; @@ -1195,7 +1212,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, idx = -EINVAL; goto free_dup_buf; } - copy_size += cmd->len[i]; + copy_size += cmdlen[i]; } cmd_size += cmd->len[i]; } @@ -1242,14 +1259,31 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, /* and copy the data that needs to be copied */ cmd_pos = offsetof(struct iwl_device_cmd, payload); + copy_size = sizeof(out_cmd->hdr); for (i = 0; i < IWL_MAX_CMD_TFDS; i++) { - if (!cmd->len[i]) + int copy = 0; + + if (!cmd->len) continue; - if (cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | - IWL_HCMD_DFL_DUP)) - break; - memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], cmd->len[i]); - cmd_pos += cmd->len[i]; + + /* need at least IWL_HCMD_MIN_COPY_SIZE copied */ + if (copy_size < IWL_HCMD_MIN_COPY_SIZE) { + copy = IWL_HCMD_MIN_COPY_SIZE - copy_size; + + if (copy > cmd->len[i]) + copy = cmd->len[i]; + } + + /* copy everything if not nocopy/dup */ + if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | + IWL_HCMD_DFL_DUP))) + copy = cmd->len[i]; + + if (copy) { + memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); + cmd_pos += copy; + copy_size += copy; + } } WARN_ON_ONCE(txq->entries[idx].copy_cmd); @@ -1275,7 +1309,14 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue); - phys_addr = dma_map_single(trans->dev, &out_cmd->hdr, copy_size, + /* + * If the entire command is smaller than IWL_HCMD_MIN_COPY_SIZE, we must + * still map at least that many bytes for the hardware to write back to. + * We have enough space, so that's not a problem. + */ + dma_size = max_t(u16, copy_size, IWL_HCMD_MIN_COPY_SIZE); + + phys_addr = dma_map_single(trans->dev, &out_cmd->hdr, dma_size, DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(trans->dev, phys_addr))) { idx = -ENOMEM; @@ -1283,14 +1324,15 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, } dma_unmap_addr_set(out_meta, mapping, phys_addr); - dma_unmap_len_set(out_meta, len, copy_size); + dma_unmap_len_set(out_meta, len, dma_size); iwl_pcie_txq_build_tfd(trans, txq, phys_addr, copy_size, 1); + /* map the remaining (adjusted) nocopy/dup fragments */ for (i = 0; i < IWL_MAX_CMD_TFDS; i++) { - const void *data = cmd->data[i]; + const void *data = cmddata[i]; - if (!cmd->len[i]) + if (!cmdlen[i]) continue; if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | IWL_HCMD_DFL_DUP))) @@ -1298,7 +1340,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) data = dup_buf; phys_addr = dma_map_single(trans->dev, (void *)data, - cmd->len[i], DMA_BIDIRECTIONAL); + cmdlen[i], DMA_BIDIRECTIONAL); if (dma_mapping_error(trans->dev, phys_addr)) { iwl_pcie_tfd_unmap(trans, out_meta, &txq->tfds[q->write_ptr], @@ -1307,7 +1349,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, goto out; } - iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmd->len[i], 0); + iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], 0); } out_meta->flags = cmd->flags; @@ -1317,8 +1359,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, txq->need_update = 1; - trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, - &out_cmd->hdr, copy_size); + trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr); /* start timer if queue currently empty */ if (q->read_ptr == q->write_ptr && trans_pcie->wd_timeout) From 38a12b5b029dec6e6dad1b5d3269b4c356c44c0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Feb 2013 14:07:56 +0100 Subject: [PATCH 036/261] iwlwifi: mvm: fix AP/GO mode station removal When stations are removed while packets are in the queue, we drain the queues first, and then remove the stations. If this happens in AP mode while the interface is removed the MAC context might be removed from the firmware before we removed the station(s), resulting in a SYSASSERT 3421. This is because we remove the MAC context from the FW in stop_ap(), but only flush the station drain work later in remove_interface(). Refactor the code a bit to have a common MAC context removal preparation first to solve this. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index e8264e11b12d..7e169b085afe 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -557,11 +557,9 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, return ret; } -static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) +static void iwl_mvm_prepare_mac_removal(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { - struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); u32 tfd_msk = 0, ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) @@ -594,12 +592,21 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, */ flush_work(&mvm->sta_drained_wk); } +} + +static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + iwl_mvm_prepare_mac_removal(mvm, vif); mutex_lock(&mvm->mutex); /* * For AP/GO interface, the tear down of the resources allocated to the - * interface should be handled as part of the bss_info_changed flow. + * interface is be handled as part of the stop_ap flow. */ if (vif->type == NL80211_IFTYPE_AP) { iwl_mvm_dealloc_int_sta(mvm, &mvmvif->bcast_sta); @@ -763,6 +770,8 @@ static void iwl_mvm_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif) struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + iwl_mvm_prepare_mac_removal(mvm, vif); + mutex_lock(&mvm->mutex); mvmvif->ap_active = false; From 46c498c2cdee5efe44f617bcd4f388179be36115 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Feb 2013 18:44:33 +0100 Subject: [PATCH 037/261] stop_machine: Mark per cpu stopper enabled early commit 14e568e78 (stop_machine: Use smpboot threads) introduced the following regression: Before this commit the stopper enabled bit was set in the online notifier. CPU0 CPU1 cpu_up cpu online hotplug_notifier(ONLINE) stopper(CPU1)->enabled = true; ... stop_machine() The conversion to smpboot threads moved the enablement to the wakeup path of the parked thread. The majority of users seem to have the following working order: CPU0 CPU1 cpu_up cpu online unpark_threads() wakeup(stopper[CPU1]) .... stopper thread runs stopper(CPU1)->enabled = true; stop_machine() But Konrad and Sander have observed: CPU0 CPU1 cpu_up cpu online unpark_threads() wakeup(stopper[CPU1]) .... stop_machine() stopper thread runs stopper(CPU1)->enabled = true; Now the stop machinery kicks CPU0 into the stop loop, where it gets stuck forever because the queue code saw stopper(CPU1)->enabled == false, so CPU0 waits for CPU1 to enter stomp_machine, but the CPU1 stopper work got discarded due to enabled == false. Add a pre_unpark function to the smpboot thread descriptor and call it before waking the thread. This fixes the problem at hand, but the stop_machine code should be more robust. The stopper->enabled flag smells fishy at best. Thanks to Konrad for going through a loop of debug patches and providing the information to decode this issue. Reported-and-tested-by: Konrad Rzeszutek Wilk Reported-and-tested-by: Sander Eikelenboom Cc: Srivatsa S. Bhat Cc: Rusty Russell Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1302261843240.22263@ionos Signed-off-by: Thomas Gleixner --- include/linux/smpboot.h | 4 ++++ kernel/smpboot.c | 2 ++ kernel/stop_machine.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index c65dee059913..13e929679550 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -24,6 +24,9 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) + * @pre_unpark: Optional unpark function, called before the thread is + * unparked (cpu online). This is not guaranteed to be + * called on the target cpu of the thread. Careful! * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ @@ -37,6 +40,7 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); + void (*pre_unpark)(unsigned int cpu); bool selfparking; const char *thread_comm; }; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d4abac261779..8eaed9aa9cf0 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -209,6 +209,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + if (ht->pre_unpark) + ht->pre_unpark(cpu); kthread_unpark(tsk); } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 95d178c62d5a..c09f2955ae30 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -336,7 +336,7 @@ static struct smp_hotplug_thread cpu_stop_threads = { .create = cpu_stop_create, .setup = cpu_stop_unpark, .park = cpu_stop_park, - .unpark = cpu_stop_unpark, + .pre_unpark = cpu_stop_unpark, .selfparking = true, }; From 0237c11044b3670adcbe80cd6dd721285347f497 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 26 Feb 2013 04:06:20 +0000 Subject: [PATCH 038/261] drivers: net: ethernet: cpsw: consider number of slaves in interation Make cpsw_add_default_vlan() look at the actual number of slaves for its iteration, so boards with less than 2 slaves don't ooops at boot. Signed-off-by: Daniel Mack Cc: Mugunthan V N Cc: David S. Miller Acked-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 7e93df6585e7..01ffbc486982 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -731,7 +731,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) writel(vlan, &priv->host_port_regs->port_vlan); - for (i = 0; i < 2; i++) + for (i = 0; i < priv->data.slaves; i++) slave_write(priv->slaves + i, vlan, reg); cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS << port, From 6c8c4e4c24b9f6cee3d356a51e4a7f2af787a49b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Feb 2013 22:26:15 +0000 Subject: [PATCH 039/261] bond: check if slave count is 0 in case when deciding to take slave's mac in bond_enslave(), check slave_cnt before actually using slave address. introduced by: commit 409cc1f8a41 (bond: have random dev address by default instead of zeroes) Reported-by: Greg Rose Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 11d01d67b3f5..7bd068a6056a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1629,7 +1629,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* If this is the first slave, then we need to set the master's hardware * address to be the same as the slave's. */ - if (bond->dev_addr_from_first) + if (bond->slave_cnt == 0 && bond->dev_addr_from_first) bond_set_dev_addr(bond->dev, slave_dev); new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); From 114a6f8b52163c0232fbcd7f3808ff04dc61a9b5 Mon Sep 17 00:00:00 2001 From: Marina Makienko Date: Mon, 25 Feb 2013 22:26:50 +0000 Subject: [PATCH 040/261] isdn: hisax: add missing usb_free_urb Add missing usb_free_urb() on failure path in st5481_setup_usb(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Marina Makienko Signed-off-by: David S. Miller --- drivers/isdn/hisax/st5481_usb.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c index 017c67ea3f4c..ead0a4fb7448 100644 --- a/drivers/isdn/hisax/st5481_usb.c +++ b/drivers/isdn/hisax/st5481_usb.c @@ -294,13 +294,13 @@ int st5481_setup_usb(struct st5481_adapter *adapter) // Allocate URBs and buffers for interrupt endpoint urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - return -ENOMEM; + goto err1; } intr->urb = urb; buf = kmalloc(INT_PKT_SIZE, GFP_KERNEL); if (!buf) { - return -ENOMEM; + goto err2; } endpoint = &altsetting->endpoint[EP_INT-1]; @@ -313,6 +313,14 @@ int st5481_setup_usb(struct st5481_adapter *adapter) endpoint->desc.bInterval); return 0; +err2: + usb_free_urb(intr->urb); + intr->urb = NULL; +err1: + usb_free_urb(ctrl->urb); + ctrl->urb = NULL; + + return -ENOMEM; } /* From f444eb10d537c776ce82fbcd07733d75f45346f1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Feb 2013 12:04:18 +0100 Subject: [PATCH 041/261] iwlwifi: fix wakeup status query and packet reporting The wakeup packet in the status response is padded out to a multiple of 4 bytes by the firmware for transfer to the host, take that into account when checking the length of the command. Also, the reported wakeup packet includes the FCS but the userspace API doesn't, so remove that. If it is a data packet it is reported as an 802.3 packet but I forgot to take into account and remove the encryption head/tail, fix all of that as well. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/mvm/d3.c | 104 ++++++++++++++++++------- drivers/net/wireless/iwlwifi/mvm/mvm.h | 4 + 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index c64d864799cd..994c8c263dc0 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -61,6 +61,7 @@ * *****************************************************************************/ +#include #include #include #include "iwl-modparams.h" @@ -192,6 +193,11 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, sizeof(wkc), &wkc); data->error = ret != 0; + mvm->ptk_ivlen = key->iv_len; + mvm->ptk_icvlen = key->icv_len; + mvm->gtk_ivlen = key->iv_len; + mvm->gtk_icvlen = key->icv_len; + /* don't upload key again */ goto out_unlock; } @@ -304,9 +310,13 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, */ if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) { key->hw_key_idx = 0; + mvm->ptk_ivlen = key->iv_len; + mvm->ptk_icvlen = key->icv_len; } else { data->gtk_key_idx++; key->hw_key_idx = data->gtk_key_idx; + mvm->gtk_ivlen = key->iv_len; + mvm->gtk_icvlen = key->icv_len; } ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, true); @@ -649,6 +659,11 @@ int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* We reprogram keys and shouldn't allocate new key indices */ memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table)); + mvm->ptk_ivlen = 0; + mvm->ptk_icvlen = 0; + mvm->ptk_ivlen = 0; + mvm->ptk_icvlen = 0; + /* * The D3 firmware still hardcodes the AP station ID for the * BSS we're associated with as 0. As a result, we have to move @@ -783,7 +798,6 @@ static void iwl_mvm_query_wakeup_reasons(struct iwl_mvm *mvm, struct iwl_wowlan_status *status; u32 reasons; int ret, len; - bool pkt8023 = false; struct sk_buff *pkt = NULL; iwl_trans_read_mem_bytes(mvm->trans, base, @@ -824,7 +838,8 @@ static void iwl_mvm_query_wakeup_reasons(struct iwl_mvm *mvm, status = (void *)cmd.resp_pkt->data; if (len - sizeof(struct iwl_cmd_header) != - sizeof(*status) + le32_to_cpu(status->wake_packet_bufsize)) { + sizeof(*status) + + ALIGN(le32_to_cpu(status->wake_packet_bufsize), 4)) { IWL_ERR(mvm, "Invalid WoWLAN status response!\n"); goto out; } @@ -836,61 +851,96 @@ static void iwl_mvm_query_wakeup_reasons(struct iwl_mvm *mvm, goto report; } - if (reasons & IWL_WOWLAN_WAKEUP_BY_MAGIC_PACKET) { + if (reasons & IWL_WOWLAN_WAKEUP_BY_MAGIC_PACKET) wakeup.magic_pkt = true; - pkt8023 = true; - } - if (reasons & IWL_WOWLAN_WAKEUP_BY_PATTERN) { + if (reasons & IWL_WOWLAN_WAKEUP_BY_PATTERN) wakeup.pattern_idx = le16_to_cpu(status->pattern_number); - pkt8023 = true; - } if (reasons & (IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_MISSED_BEACON | IWL_WOWLAN_WAKEUP_BY_DISCONNECTION_ON_DEAUTH)) wakeup.disconnect = true; - if (reasons & IWL_WOWLAN_WAKEUP_BY_GTK_REKEY_FAILURE) { + if (reasons & IWL_WOWLAN_WAKEUP_BY_GTK_REKEY_FAILURE) wakeup.gtk_rekey_failure = true; - pkt8023 = true; - } - if (reasons & IWL_WOWLAN_WAKEUP_BY_RFKILL_DEASSERTED) { + if (reasons & IWL_WOWLAN_WAKEUP_BY_RFKILL_DEASSERTED) wakeup.rfkill_release = true; - pkt8023 = true; - } - if (reasons & IWL_WOWLAN_WAKEUP_BY_EAPOL_REQUEST) { + if (reasons & IWL_WOWLAN_WAKEUP_BY_EAPOL_REQUEST) wakeup.eap_identity_req = true; - pkt8023 = true; - } - if (reasons & IWL_WOWLAN_WAKEUP_BY_FOUR_WAY_HANDSHAKE) { + if (reasons & IWL_WOWLAN_WAKEUP_BY_FOUR_WAY_HANDSHAKE) wakeup.four_way_handshake = true; - pkt8023 = true; - } if (status->wake_packet_bufsize) { - u32 pktsize = le32_to_cpu(status->wake_packet_bufsize); - u32 pktlen = le32_to_cpu(status->wake_packet_length); + int pktsize = le32_to_cpu(status->wake_packet_bufsize); + int pktlen = le32_to_cpu(status->wake_packet_length); + const u8 *pktdata = status->wake_packet; + struct ieee80211_hdr *hdr = (void *)pktdata; + int truncated = pktlen - pktsize; + + /* this would be a firmware bug */ + if (WARN_ON_ONCE(truncated < 0)) + truncated = 0; + + if (ieee80211_is_data(hdr->frame_control)) { + int hdrlen = ieee80211_hdrlen(hdr->frame_control); + int ivlen = 0, icvlen = 4; /* also FCS */ - if (pkt8023) { pkt = alloc_skb(pktsize, GFP_KERNEL); if (!pkt) goto report; - memcpy(skb_put(pkt, pktsize), status->wake_packet, - pktsize); + + memcpy(skb_put(pkt, hdrlen), pktdata, hdrlen); + pktdata += hdrlen; + pktsize -= hdrlen; + + if (ieee80211_has_protected(hdr->frame_control)) { + if (is_multicast_ether_addr(hdr->addr1)) { + ivlen = mvm->gtk_ivlen; + icvlen += mvm->gtk_icvlen; + } else { + ivlen = mvm->ptk_ivlen; + icvlen += mvm->ptk_icvlen; + } + } + + /* if truncated, FCS/ICV is (partially) gone */ + if (truncated >= icvlen) { + icvlen = 0; + truncated -= icvlen; + } else { + icvlen -= truncated; + truncated = 0; + } + + pktsize -= ivlen + icvlen; + pktdata += ivlen; + + memcpy(skb_put(pkt, pktsize), pktdata, pktsize); + if (ieee80211_data_to_8023(pkt, vif->addr, vif->type)) goto report; wakeup.packet = pkt->data; wakeup.packet_present_len = pkt->len; - wakeup.packet_len = pkt->len - (pktlen - pktsize); + wakeup.packet_len = pkt->len - truncated; wakeup.packet_80211 = false; } else { + int fcslen = 4; + + if (truncated >= 4) { + truncated -= 4; + fcslen = 0; + } else { + fcslen -= truncated; + truncated = 0; + } + pktsize -= fcslen; wakeup.packet = status->wake_packet; wakeup.packet_present_len = pktsize; - wakeup.packet_len = pktlen; + wakeup.packet_len = pktlen - truncated; wakeup.packet_80211 = true; } } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 4e339ccfa800..537711b10478 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -327,6 +327,10 @@ struct iwl_mvm { struct led_classdev led; struct ieee80211_vif *p2p_device_vif; + +#ifdef CONFIG_PM_SLEEP + int gtk_ivlen, gtk_icvlen, ptk_ivlen, ptk_icvlen; +#endif }; /* Extract MVM priv from op_mode and _hw */ From e477598351a40769f5b46ccea78479a1aad6f161 Mon Sep 17 00:00:00 2001 From: Dor Shaish Date: Tue, 12 Feb 2013 09:49:00 +0200 Subject: [PATCH 042/261] iwlwifi: mvm: Remove testing of static PIC in PhyDB The PIC was supposed to be a small signature appended to the PhyDB data, but the signature isn't really static and thus attempting to check it just causes the warnings spuriously so remove them. Signed-off-by: Dor Shaish Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/iwl-phy-db.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/iwlwifi/iwl-phy-db.c index 14fc8d39fc28..3392011a8768 100644 --- a/drivers/net/wireless/iwlwifi/iwl-phy-db.c +++ b/drivers/net/wireless/iwlwifi/iwl-phy-db.c @@ -136,12 +136,6 @@ struct iwl_calib_res_notif_phy_db { u8 data[]; } __packed; -#define IWL_PHY_DB_STATIC_PIC cpu_to_le32(0x21436587) -static inline void iwl_phy_db_test_pic(__le32 pic) -{ - WARN_ON(IWL_PHY_DB_STATIC_PIC != pic); -} - struct iwl_phy_db *iwl_phy_db_init(struct iwl_trans *trans) { struct iwl_phy_db *phy_db = kzalloc(sizeof(struct iwl_phy_db), @@ -260,11 +254,6 @@ int iwl_phy_db_set_section(struct iwl_phy_db *phy_db, struct iwl_rx_packet *pkt, (size - CHANNEL_NUM_SIZE) / phy_db->channel_num; } - /* Test PIC */ - if (type != IWL_PHY_DB_CFG) - iwl_phy_db_test_pic(*(((__le32 *)phy_db_notif->data) + - (size / sizeof(__le32)) - 1)); - IWL_DEBUG_INFO(phy_db->trans, "%s(%d): [PHYDB]SET: Type %d , Size: %d\n", __func__, __LINE__, type, size); @@ -372,11 +361,6 @@ int iwl_phy_db_get_section_data(struct iwl_phy_db *phy_db, *size = entry->size; } - /* Test PIC */ - if (type != IWL_PHY_DB_CFG) - iwl_phy_db_test_pic(*(((__le32 *)*data) + - (*size / sizeof(__le32)) - 1)); - IWL_DEBUG_INFO(phy_db->trans, "%s(%d): [PHYDB] GET: Type %d , Size: %d\n", __func__, __LINE__, type, *size); From e70ab977991964a5a7ad1182799451d067e62669 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 25 Feb 2013 21:32:25 +0000 Subject: [PATCH 043/261] proc connector: reject unprivileged listener bumps While PROC_CN_MCAST_LISTEN/IGNORE is entirely advisory, it was possible for an unprivileged user to turn off notifications for all listeners by sending PROC_CN_MCAST_IGNORE. Instead, require the same privileges as required for a multicast bind. Signed-off-by: Kees Cook Cc: Evgeniy Polyakov Cc: Matt Helsley Cc: stable@vger.kernel.org Acked-by: Evgeniy Polyakov Acked-by: Matt Helsley Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index fce2000eec31..1110478dd0fd 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -313,6 +313,12 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, (task_active_pid_ns(current) != &init_pid_ns)) return; + /* Can only change if privileged. */ + if (!capable(CAP_NET_ADMIN)) { + err = EPERM; + goto out; + } + mc_op = (enum proc_cn_mcast_op *)msg->data; switch (*mc_op) { case PROC_CN_MCAST_LISTEN: @@ -325,6 +331,8 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, err = EINVAL; break; } + +out: cn_proc_ack(err, msg->seq, msg->ack); } From 90c7881ecee1f08e0a49172cf61371cf2509ee4a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Feb 2013 19:15:02 +0000 Subject: [PATCH 044/261] irda: small read beyond end of array in debug code charset comes from skb->data. It's a number in the 0-255 range. If we have debugging turned on then this could cause a read beyond the end of the array. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/irda/iriap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/irda/iriap.c b/net/irda/iriap.c index e71e85ba2bf1..29340a9a6fb9 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -495,8 +495,11 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, /* case CS_ISO_8859_9: */ /* case CS_UNICODE: */ default: - IRDA_DEBUG(0, "%s(), charset %s, not supported\n", - __func__, ias_charset_types[charset]); + IRDA_DEBUG(0, "%s(), charset [%d] %s, not supported\n", + __func__, charset, + charset < ARRAY_SIZE(ias_charset_types) ? + ias_charset_types[charset] : + "(unknown)"); /* Aborting, close connection! */ iriap_disconnect_request(self); From e2593fcde1d906b26b81b38755749f7427f3439f Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Wed, 27 Feb 2013 00:04:59 +0000 Subject: [PATCH 045/261] bnx2x: fix UDP checksum for 57710/57711. Since commit 86564c3f "bnx2x: Remove many sparse warnings" UDP csum offload is broken for 57710/57711. Fix return value. Signed-off-by: Dmitry Kravkov CC: Ariel Elior CC: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ecac04a3687c..a923bc4d5a1f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3142,7 +3142,7 @@ static inline __le16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix) tsum = ~csum_fold(csum_add((__force __wsum) csum, csum_partial(t_header, -fix, 0))); - return bswab16(csum); + return bswab16(tsum); } static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb) From 1f84eab4ad73bcb7d779cba65291fe62909e373f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 27 Feb 2013 03:08:58 +0000 Subject: [PATCH 046/261] net: cdc_ncm: tag Huawei devices (e.g. E5331) with FLAG_WWAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tag all Huawei NCM devices as WWAN modems, as we don't know of any which are not. This is necessary for userspace clients to know that the device requires further setup on e.g. an AT-capable serial ports before connectivity is available. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 4a8c25a22294..61b74a2b89ac 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1213,6 +1213,14 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long) &wwan_info, }, + /* tag Huawei devices as wwan */ + { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, + }, + /* Huawei NCM devices disguised as vendor specific */ { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, 0xff, 0x02, 0x16), .driver_info = (unsigned long)&wwan_info, From 45af3fb4a018ef84bf1c9f2dfbd887a41242e77f Mon Sep 17 00:00:00 2001 From: Glen Turner Date: Wed, 27 Feb 2013 04:32:36 +0000 Subject: [PATCH 047/261] usb/net/asix_devices: Add USBNET HG20F9 ethernet dongle This USB ethernet adapter was purchased in anodyne packaging from the computer store adjacent to linux.conf.au 2013 in Canberra (Australia). A web search shows other recent purchasers in Lancaster (UK) and Seattle (USA). Just like an emergent virus, our age of e-commerce and airmail allows underdocumented hardware to spread around the world instantly using the vector of ridiculously low prices. Paige Thompson, infected via eBay, discovered that the HG20F9 is a copy of the Asix 88772B; many viruses copy the RNA of other viruses. See Paige's work at . This patch uses her discovery to update the restructured Asix driver in the current kernel. Just as some viruses inhabit seemingly-healthy cells, the HG20F9 uses the Vendor ID 0x066b assigned to Linksys Inc. For the present there is no clash of Product ID 0x20f9. Signed-off-by: Glen Turner Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 2205dbc8d32f..709753469099 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -924,6 +924,29 @@ static const struct driver_info ax88178_info = { .tx_fixup = asix_tx_fixup, }; +/* + * USBLINK 20F9 "USB 2.0 LAN" USB ethernet adapter, typically found in + * no-name packaging. + * USB device strings are: + * 1: Manufacturer: USBLINK + * 2: Product: HG20F9 USB2.0 + * 3: Serial: 000003 + * Appears to be compatible with Asix 88772B. + */ +static const struct driver_info hg20f9_info = { + .description = "HG20F9 USB 2.0 Ethernet", + .bind = ax88772_bind, + .unbind = ax88772_unbind, + .status = asix_status, + .link_reset = ax88772_link_reset, + .reset = ax88772_reset, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | + FLAG_MULTI_PACKET, + .rx_fixup = asix_rx_fixup_common, + .tx_fixup = asix_tx_fixup, + .data = FLAG_EEPROM_MAC, +}; + extern const struct driver_info ax88172a_info; static const struct usb_device_id products [] = { @@ -1063,6 +1086,14 @@ static const struct usb_device_id products [] = { /* ASIX 88172a demo board */ USB_DEVICE(0x0b95, 0x172a), .driver_info = (unsigned long) &ax88172a_info, +}, { + /* + * USBLINK HG20F9 "USB 2.0 LAN" + * Appears to have gazumped Linksys's manufacturer ID but + * doesn't (yet) conflict with any known Linksys product. + */ + USB_DEVICE(0x066b, 0x20f9), + .driver_info = (unsigned long) &hg20f9_info, }, { }, // END }; From a3d63cadbad97671d740a9698acc2c95d1ca6e79 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 22 Feb 2013 21:09:17 +0100 Subject: [PATCH 048/261] ath9k: fix RSSI dummy marker value RSSI is being stored internally as s8 in several places. The indication of an unset RSSI value, ATH_RSSI_DUMMY_MARKER, was supposed to have been set to 127, but ended up being set to 0x127 because of a code cleanup mistake. This could lead to invalid signal strength values in a few places. Signed-off-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/common.h b/drivers/net/wireless/ath/ath9k/common.h index 5f845beeb18b..050ca4a4850d 100644 --- a/drivers/net/wireless/ath/ath9k/common.h +++ b/drivers/net/wireless/ath/ath9k/common.h @@ -27,7 +27,7 @@ #define WME_MAX_BA WME_BA_BMP_SIZE #define ATH_TID_MAX_BUFS (2 * WME_MAX_BA) -#define ATH_RSSI_DUMMY_MARKER 0x127 +#define ATH_RSSI_DUMMY_MARKER 127 #define ATH_RSSI_LPF_LEN 10 #define RSSI_LPF_THRESHOLD -20 #define ATH_RSSI_EP_MULTIPLIER (1<<7) From 838f427955dcfd16858b0108ce29029da0d56a4e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 22 Feb 2013 21:37:25 +0100 Subject: [PATCH 049/261] ath9k_htc: fix signal strength handling issues The ath9k commit 2ef167557c0a26c88162ecffb017bfcc51eb7b29 (ath9k: fix signal strength reporting issues) fixed an issue where the reported per-frame signal strength reported to mac80211 was being overwritten with an internal average. The same issue is also present in ath9k_htc. In addition to preventing the driver from overwriting the value, this commit also ensures that the internal average (which is used for ANI) only tracks beacons of the AP that we're connected to. Signed-off-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc.h | 1 + drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 96bfb18078fa..d3b099d7898b 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index b6a5a08810b8..878862178e06 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -1067,15 +1067,19 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, last_rssi = priv->rx.last_rssi; - if (likely(last_rssi != ATH_RSSI_DUMMY_MARKER)) - rxbuf->rxstatus.rs_rssi = ATH_EP_RND(last_rssi, - ATH_RSSI_EP_MULTIPLIER); + if (ieee80211_is_beacon(hdr->frame_control) && + !is_zero_ether_addr(common->curbssid) && + ether_addr_equal(hdr->addr3, common->curbssid)) { + s8 rssi = rxbuf->rxstatus.rs_rssi; - if (rxbuf->rxstatus.rs_rssi < 0) - rxbuf->rxstatus.rs_rssi = 0; + if (likely(last_rssi != ATH_RSSI_DUMMY_MARKER)) + rssi = ATH_EP_RND(last_rssi, ATH_RSSI_EP_MULTIPLIER); - if (ieee80211_is_beacon(fc)) - priv->ah->stats.avgbrssi = rxbuf->rxstatus.rs_rssi; + if (rssi < 0) + rssi = 0; + + priv->ah->stats.avgbrssi = rssi; + } rx_status->mactime = be64_to_cpu(rxbuf->rxstatus.rs_tstamp); rx_status->band = hw->conf.channel->band; From f45dd363bee071a62e32398a0ae4a0214b8029eb Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 23 Feb 2013 23:30:22 +0100 Subject: [PATCH 050/261] bcma: init spin lock This spin lock was not initialized. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_pci_host.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c index 221f8bb76390..25de38719684 100644 --- a/drivers/bcma/driver_pci_host.c +++ b/drivers/bcma/driver_pci_host.c @@ -405,6 +405,8 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc) return; } + spin_lock_init(&pc_host->cfgspace_lock); + pc->host_controller = pc_host; pc_host->pci_controller.io_resource = &pc_host->io_resource; pc_host->pci_controller.mem_resource = &pc_host->mem_resource; From 3412f2f086ea7531378fabe756bd4a1109994ae6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 25 Feb 2013 20:51:07 +0100 Subject: [PATCH 051/261] ath9k_hw: improve reset reliability after errors On many different chips, important aspects of the MAC state are not fully cleared by a warm reset. This can show up as tx/rx hangs, those annoying "DMA failed to stop in 10 ms..." messages or other quirks. On AR933x, the chip can occasionally get stuck in a way that only a driver unload/reload or a reboot would bring it back to life. With this patch, a full reset is issued when bringing the chip out of FULL-SLEEP state (after idle), or if either Rx or Tx was not shut down properly. This makes the DMA related error messages disappear completely in my tests on AR933x, and the chip does not get stuck anymore. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/hw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 42cf3c7f1e25..7b485e4d104c 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1463,7 +1463,9 @@ static bool ath9k_hw_chip_reset(struct ath_hw *ah, reset_type = ATH9K_RESET_POWER_ON; else reset_type = ATH9K_RESET_COLD; - } + } else if (ah->chip_fullsleep || REG_READ(ah, AR_Q_TXE) || + (REG_READ(ah, AR_CR) & AR_CR_RXE)) + reset_type = ATH9K_RESET_COLD; if (!ath9k_hw_set_reset_reg(ah, reset_type)) return false; From 3e7a4ff7c5b6423ddb644df9c41b8b6d2fb79d30 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Mon, 25 Feb 2013 16:01:34 -0800 Subject: [PATCH 052/261] mwifiex: correct sleep delay counter Maximum delay for waking up card is 50 ms. Because of typo in counter, this delay goes to 500ms. This patch fixes the bug. Cc: # 3.2+ Signed-off-by: Avinash Patil Signed-off-by: Amitkumar Karwar Signed-off-by: Yogesh Ashok Powar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 35c79722c361..5c395e2e6a2b 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -302,7 +302,7 @@ static int mwifiex_pm_wakeup_card(struct mwifiex_adapter *adapter) i++; usleep_range(10, 20); /* 50ms max wait */ - if (i == 50000) + if (i == 5000) break; } From 6ef9e2f6d12ce9e2120916804d2ddd46b954a70b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Feb 2013 16:09:55 +0100 Subject: [PATCH 053/261] rt2x00: error in configurations with mesh support disabled If CONFIG_MAC80211_MESH is not set, cfg80211 will now allow advertising interface combinations with NL80211_IFTYPE_MESH_POINT present. Add appropriate ifdefs to avoid running into errors. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 1031db66474a..189744db65e0 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1236,8 +1236,10 @@ static inline void rt2x00lib_set_if_combinations(struct rt2x00_dev *rt2x00dev) */ if_limit = &rt2x00dev->if_limits_ap; if_limit->max = rt2x00dev->ops->max_ap_intf; - if_limit->types = BIT(NL80211_IFTYPE_AP) | - BIT(NL80211_IFTYPE_MESH_POINT); + if_limit->types = BIT(NL80211_IFTYPE_AP); +#ifdef CONFIG_MAC80211_MESH + if_limit->types |= BIT(NL80211_IFTYPE_MESH_POINT); +#endif /* * Build up AP interface combinations structure. @@ -1309,7 +1311,9 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) rt2x00dev->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP) | +#ifdef CONFIG_MAC80211_MESH BIT(NL80211_IFTYPE_MESH_POINT) | +#endif BIT(NL80211_IFTYPE_WDS); rt2x00dev->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; From 466026989f112e0546ca39ab00a759af82dbe83a Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Tue, 26 Feb 2013 12:58:35 -0800 Subject: [PATCH 054/261] libertas: fix crash for SD8688 For SD8688, FUNC_INIT command is queued before fw_ready flag is set. This causes the following crash as lbs_thread blocks any command if fw_ready is not set. [ 209.338953] [] (__schedule+0x610/0x764) from [] (__lbs_cmd+0xb8/0x130 [libertas]) [ 209.348340] [] (__lbs_cmd+0xb8/0x130 [libertas]) from [] (if_sdio_finish_power_on+0xec/0x1b0 [libertas_sdio]) [ 209.360136] [] (if_sdio_finish_power_on+0xec/0x1b0 [libertas_sdio]) from [] (if_sdio_power_on+0x18c/0x20c [libertas_sdio]) [ 209.373052] [] (if_sdio_power_on+0x18c/0x20c [libertas_sdio]) from [] (if_sdio_probe+0x200/0x31c [libertas_sdio]) [ 209.385316] [] (if_sdio_probe+0x200/0x31c [libertas_sdio]) from [] (sdio_bus_probe+0x94/0xfc [mmc_core]) [ 209.396748] [] (sdio_bus_probe+0x94/0xfc [mmc_core]) from [] (driver_probe_device+0x12c/0x348) [ 209.407214] [] (driver_probe_device+0x12c/0x348) from [] (__driver_attach+0x78/0x9c) [ 209.416798] [] (__driver_attach+0x78/0x9c) from [] (bus_for_each_dev+0x50/0x88) [ 209.425946] [] (bus_for_each_dev+0x50/0x88) from [] (bus_add_driver+0x108/0x268) [ 209.435180] [] (bus_add_driver+0x108/0x268) from [] (driver_register+0xa4/0x134) [ 209.444426] [] (driver_register+0xa4/0x134) from [] (if_sdio_init_module+0x1c/0x3c [libertas_sdio]) [ 209.455339] [] (if_sdio_init_module+0x1c/0x3c [libertas_sdio]) from [] (do_one_initcall+0x98/0x174) [ 209.466236] [] (do_one_initcall+0x98/0x174) from [] (load_module+0x1c5c/0x1f80) [ 209.475390] [] (load_module+0x1c5c/0x1f80) from [] (sys_init_module+0x104/0x128) [ 209.484632] [] (sys_init_module+0x104/0x128) from [] (ret_fast_syscall+0x0/0x38) Fix it by setting fw_ready flag prior to queuing FUNC_INIT command. Cc: # 3.5+ Reported-by: Lubomir Rintel Tested-by: Lubomir Rintel Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/if_sdio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 739309e70d8b..45578335e420 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -825,6 +825,11 @@ static void if_sdio_finish_power_on(struct if_sdio_card *card) sdio_release_host(func); + /* Set fw_ready before queuing any commands so that + * lbs_thread won't block from sending them to firmware. + */ + priv->fw_ready = 1; + /* * FUNC_INIT is required for SD8688 WLAN/BT multiple functions */ @@ -839,7 +844,6 @@ static void if_sdio_finish_power_on(struct if_sdio_card *card) netdev_alert(priv->dev, "CMD_FUNC_INIT cmd failed\n"); } - priv->fw_ready = 1; wake_up(&card->pwron_waitq); if (!card->started) { From 1111eae90fb64a9d9ed133e410712f1e34fdce4a Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 27 Feb 2013 11:37:48 -0800 Subject: [PATCH 055/261] eCryptfs: Fix redundant error check on ecryptfs_find_daemon_by_euid() It is sufficient to check the return code of ecryptfs_find_daemon_by_euid(). If it returns 0, it always sets the daemon pointer to point to a valid ecryptfs_daemon. Signed-off-by: Tyler Hicks Reported-by: Kees Cook --- fs/ecryptfs/messaging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index d5c7297c5816..474051bd8d92 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -283,7 +283,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, int rc; rc = ecryptfs_find_daemon_by_euid(&daemon); - if (rc || !daemon) { + if (rc) { rc = -ENOTCONN; goto out; } From 726bc6b092da4c093eb74d13c07184b18c1af0f1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 27 Feb 2013 10:57:31 +0000 Subject: [PATCH 056/261] net/sctp: Validate parameter size for SCTP_GET_ASSOC_STATS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building sctp may fail with: In function ‘copy_from_user’, inlined from ‘sctp_getsockopt_assoc_stats’ at net/sctp/socket.c:5656:20: arch/x86/include/asm/uaccess_32.h:211:26: error: call to ‘copy_from_user_overflow’ declared with attribute error: copy_from_user() buffer size is not provably correct if built with W=1 due to a missing parameter size validation before the call to copy_from_user. Signed-off-by: Guenter Roeck Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cedd9bf67b8c..9ef5c7312e12 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5653,6 +5653,9 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, if (len < sizeof(sctp_assoc_t)) return -EINVAL; + /* Allow the struct to grow and fill in as much as possible */ + len = min_t(size_t, len, sizeof(sas)); + if (copy_from_user(&sas, optval, len)) return -EFAULT; @@ -5686,9 +5689,6 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, /* Mark beginning of a new observation period */ asoc->stats.max_obs_rto = asoc->rto_min; - /* Allow the struct to grow and fill in as much as possible */ - len = min_t(size_t, len, sizeof(sas)); - if (put_user(len, optlen)) return -EFAULT; From 51acbcec6c42b24482bac18e42befc822524535d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 Feb 2013 09:08:34 +1100 Subject: [PATCH 057/261] md: remove CONFIG_MULTICORE_RAID456 This doesn't seem to actually help and we have an alternate multi-threading approach waiting in the wings, so just get rid of this config option and associated code. As a bonus, we remove one use of CONFIG_EXPERIMENTAL Cc: Dan Williams Cc: Kees Cook Signed-off-by: NeilBrown --- drivers/md/Kconfig | 11 ----------- drivers/md/raid5.c | 38 +------------------------------------- 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 91a02eeeb319..9a10313d0670 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -154,17 +154,6 @@ config MD_RAID456 If unsure, say Y. -config MULTICORE_RAID456 - bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)" - depends on MD_RAID456 - depends on SMP - depends on EXPERIMENTAL - ---help--- - Enable the raid456 module to dispatch per-stripe raid operations to a - thread pool. - - If unsure, say N. - config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 19d77a026639..35031c8b2d02 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1406,7 +1406,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu &sh->ops.zero_sum_result, percpu->spare_page, &submit); } -static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) +static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) { int overlap_clear = 0, i, disks = sh->disks; struct dma_async_tx_descriptor *tx = NULL; @@ -1471,36 +1471,6 @@ static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) put_cpu(); } -#ifdef CONFIG_MULTICORE_RAID456 -static void async_run_ops(void *param, async_cookie_t cookie) -{ - struct stripe_head *sh = param; - unsigned long ops_request = sh->ops.request; - - clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state); - wake_up(&sh->ops.wait_for_ops); - - __raid_run_ops(sh, ops_request); - release_stripe(sh); -} - -static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) -{ - /* since handle_stripe can be called outside of raid5d context - * we need to ensure sh->ops.request is de-staged before another - * request arrives - */ - wait_event(sh->ops.wait_for_ops, - !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state)); - sh->ops.request = ops_request; - - atomic_inc(&sh->count); - async_schedule(async_run_ops, sh); -} -#else -#define raid_run_ops __raid_run_ops -#endif - static int grow_one_stripe(struct r5conf *conf) { struct stripe_head *sh; @@ -1509,9 +1479,6 @@ static int grow_one_stripe(struct r5conf *conf) return 0; sh->raid_conf = conf; - #ifdef CONFIG_MULTICORE_RAID456 - init_waitqueue_head(&sh->ops.wait_for_ops); - #endif spin_lock_init(&sh->stripe_lock); @@ -1630,9 +1597,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) break; nsh->raid_conf = conf; - #ifdef CONFIG_MULTICORE_RAID456 - init_waitqueue_head(&nsh->ops.wait_for_ops); - #endif spin_lock_init(&nsh->stripe_lock); list_add(&nsh->lru, &newstripes); From f3378b48705154b9089affb2d2e939622aea68f1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 Feb 2013 11:59:03 +1100 Subject: [PATCH 058/261] md: expedite metadata update when switching read-auto -> active If something has failed while the array was read-auto, then when we switch to 'active' we need to update the metadata. This will happen anyway but it is good to expedite it, and also to ensure any failed device has been released by the underlying device before we try to action the ioctl which caused us to switch to 'active' mode. Reported-by: Joe Lawrence Signed-off-by: NeilBrown --- drivers/md/md.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index f363135144f6..fcb878f88796 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6532,7 +6532,17 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, mddev->ro = 0; sysfs_notify_dirent_safe(mddev->sysfs_state); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); + /* mddev_unlock will wake thread */ + /* If a device failed while we were read-only, we + * need to make sure the metadata is updated now. + */ + if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) { + mddev_unlock(mddev); + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_DEVS, &mddev->flags) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); + mddev_lock(mddev); + } } else { err = -EROFS; goto abort_unlock; From 4cd5d1115c2f752ca94a0eb461b36d88fb37ed1e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 28 Feb 2013 20:00:43 +0100 Subject: [PATCH 059/261] irq: Don't re-enable interrupts at the end of irq_exit Commit 74eed0163d0def3fce27228d9ccf3d36e207b286 "irq: Ensure irq_exit() code runs with interrupts disabled" restore interrupts flags in the end of irq_exit() for archs that don't define __ARCH_IRQ_EXIT_IRQS_DISABLED. However always returning from irq_exit() with interrupts disabled should not be a problem for these archs. Prior to this commit this was already happening anytime we processed pending softirqs anyway. Suggested-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul E. McKenney --- kernel/softirq.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index f42ff97e1f8f..dce38fac4f32 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -334,9 +334,7 @@ static inline void invoke_softirq(void) void irq_exit(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED - unsigned long flags; - - local_irq_save(flags); + local_irq_disable(); #else WARN_ON_ONCE(!irqs_disabled()); #endif @@ -353,9 +351,6 @@ void irq_exit(void) tick_nohz_irq_exit(); #endif rcu_irq_exit(); -#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED - local_irq_restore(flags); -#endif } /* From 70fc69bc5a54d9776ace7c99d46eb533f8fb6e89 Mon Sep 17 00:00:00 2001 From: "Lee A. Roberts" Date: Thu, 28 Feb 2013 04:37:27 +0000 Subject: [PATCH 060/261] sctp: fix association hangs due to off-by-one errors in sctp_tsnmap_grow() In sctp_tsnmap_mark(), correct off-by-one error when calculating size value for sctp_tsnmap_grow(). In sctp_tsnmap_grow(), correct off-by-one error when copying and resizing the tsnmap. If max_tsn_seen is in the LSB of the word, this bit can be lost, causing the corresponding packet to be transmitted again and to be entered as a duplicate into the SCTP reassembly/ordering queues. Change parameter name from "gap" (zero-based index) to "size" (one-based) to enhance code readability. Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/tsnmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 5f25e0c92c31..396c45174e5b 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -51,7 +51,7 @@ static void sctp_tsnmap_update(struct sctp_tsnmap *map); static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, __u16 len, __u16 *start, __u16 *end); -static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap); +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size); /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, @@ -124,7 +124,7 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, gap = tsn - map->base_tsn; - if (gap >= map->len && !sctp_tsnmap_grow(map, gap)) + if (gap >= map->len && !sctp_tsnmap_grow(map, gap + 1)) return -ENOMEM; if (!sctp_tsnmap_has_gap(map) && gap == 0) { @@ -360,23 +360,24 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, return ngaps; } -static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size) { unsigned long *new; unsigned long inc; u16 len; - if (gap >= SCTP_TSN_MAP_SIZE) + if (size > SCTP_TSN_MAP_SIZE) return 0; - inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; + inc = ALIGN((size - map->len), BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE); new = kzalloc(len>>3, GFP_ATOMIC); if (!new) return 0; - bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn); + bitmap_copy(new, map->tsn_map, + map->max_tsn_seen - map->cumulative_tsn_ack_point); kfree(map->tsn_map); map->tsn_map = new; map->len = len; From e67f85ecd83de66d4f25f2e0f90bb0d01a52ddd8 Mon Sep 17 00:00:00 2001 From: "Lee A. Roberts" Date: Thu, 28 Feb 2013 04:37:28 +0000 Subject: [PATCH 061/261] sctp: fix association hangs due to reneging packets below the cumulative TSN ACK point In sctp_ulpq_renege_list(), do not renege packets below the cumulative TSN ACK point. Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/ulpqueue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ada17464b65b..63afddcbcd2c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -969,11 +969,16 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, tsnmap = &ulpq->asoc->peer.tsn_map; - while ((skb = __skb_dequeue_tail(list)) != NULL) { - freed += skb_headlen(skb); + while ((skb = skb_peek_tail(list)) != NULL) { event = sctp_skb2event(skb); tsn = event->tsn; + /* Don't renege below the Cumulative TSN ACK Point. */ + if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) + break; + + __skb_unlink(skb, list); + freed += skb_headlen(skb); sctp_ulpevent_free(event); sctp_tsnmap_renege(tsnmap, tsn); if (freed >= needed) From 95ac7b859f508b1b3e6adf7dce307864e4384a69 Mon Sep 17 00:00:00 2001 From: "Lee A. Roberts" Date: Thu, 28 Feb 2013 04:37:29 +0000 Subject: [PATCH 062/261] sctp: fix association hangs due to errors when reneging events from the ordering queue In sctp_ulpq_renege_list(), events being reneged from the ordering queue may correspond to multiple TSNs. Identify all affected packets; sum freed space and renege from the tsnmap. Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/ulpqueue.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 63afddcbcd2c..f221fbbc80ac 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -962,8 +962,8 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; - __u32 tsn; - struct sk_buff *skb; + __u32 tsn, last_tsn; + struct sk_buff *skb, *flist, *last; struct sctp_ulpevent *event; struct sctp_tsnmap *tsnmap; @@ -977,10 +977,28 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) break; - __skb_unlink(skb, list); + /* Events in ordering queue may have multiple fragments + * corresponding to additional TSNs. Sum the total + * freed space; find the last TSN. + */ freed += skb_headlen(skb); + flist = skb_shinfo(skb)->frag_list; + for (last = flist; flist; flist = flist->next) { + last = flist; + freed += skb_headlen(last); + } + if (last) + last_tsn = sctp_skb2event(last)->tsn; + else + last_tsn = tsn; + + /* Unlink the event, then renege all applicable TSNs. */ + __skb_unlink(skb, list); sctp_ulpevent_free(event); - sctp_tsnmap_renege(tsnmap, tsn); + while (TSN_lte(tsn, last_tsn)) { + sctp_tsnmap_renege(tsnmap, tsn); + tsn++; + } if (freed >= needed) return freed; } From d003b41b801124b96337973b01eada6a83673d23 Mon Sep 17 00:00:00 2001 From: "Lee A. Roberts" Date: Thu, 28 Feb 2013 04:37:30 +0000 Subject: [PATCH 063/261] sctp: fix association hangs due to partial delivery errors In sctp_ulpq_tail_data(), use return values 0,1 to indicate whether a complete event (with MSG_EOR set) was delivered. A return value of -ENOMEM continues to indicate an out-of-memory condition was encountered. In sctp_ulpq_retrieve_partial() and sctp_ulpq_retrieve_first(), correct message reassembly logic for SCTP partial delivery. Change logic to ensure that as much data as possible is sent with the initial partial delivery and that following partial deliveries contain all available data. In sctp_ulpq_partial_delivery(), attempt partial delivery only if the data on the head of the reassembly queue is at or before the cumulative TSN ACK point. In sctp_ulpq_renege(), use the modified return values from sctp_ulpq_tail_data() to choose whether to attempt partial delivery or to attempt to drain the reassembly queue as a means to reduce memory pressure. Remove call to sctp_tsnmap_mark(), as this is handled correctly in call to sctp_ulpq_tail_data(). Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/ulpqueue.c | 54 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f221fbbc80ac..0fd5b3d2df03 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -106,6 +106,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, { struct sk_buff_head temp; struct sctp_ulpevent *event; + int event_eor = 0; /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); @@ -127,10 +128,12 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ - if (event) + if (event) { + event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; sctp_ulpq_tail_event(ulpq, event); + } - return 0; + return event_eor; } /* Add a new event for propagation to the ULP. */ @@ -540,14 +543,19 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (!first_frag) + return NULL; + goto done; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; - } else if (next_tsn == ctsn) + } else if (next_tsn == ctsn) { next_tsn++; - else + last_frag = pos; + } else goto done; break; case SCTP_DATA_LAST_FRAG: @@ -651,6 +659,14 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) } else goto done; break; + + case SCTP_DATA_LAST_FRAG: + if (!first_frag) + return NULL; + else + goto done; + break; + default: return NULL; } @@ -1025,16 +1041,28 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event; struct sctp_association *asoc; struct sctp_sock *sp; + __u32 ctsn; + struct sk_buff *skb; asoc = ulpq->asoc; sp = sctp_sk(asoc->base.sk); /* If the association is already in Partial Delivery mode - * we have noting to do. + * we have nothing to do. */ if (ulpq->pd_mode) return; + /* Data must be at or below the Cumulative TSN ACK Point to + * start partial delivery. + */ + skb = skb_peek(&asoc->ulpq.reasm); + if (skb != NULL) { + ctsn = sctp_skb2event(skb)->tsn; + if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map))) + return; + } + /* If the user enabled fragment interleave socket option, * multiple associations can enter partial delivery. * Otherwise, we can only enter partial delivery if the @@ -1077,12 +1105,16 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, } /* If able to free enough room, accept this chunk. */ if (chunk && (freed >= needed)) { - __u32 tsn; - tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); - sctp_ulpq_tail_data(ulpq, chunk, gfp); - - sctp_ulpq_partial_delivery(ulpq, gfp); + int retval; + retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); + /* + * Enter partial delivery if chunk has not been + * delivered; otherwise, drain the reassembly queue. + */ + if (retval <= 0) + sctp_ulpq_partial_delivery(ulpq, gfp); + else if (retval == 1) + sctp_ulpq_reasm_drain(ulpq); } sk_mem_reclaim(asoc->base.sk); From 79ffef1fe213851f44bfccf037170a140e929f85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Feb 2013 07:05:03 +0000 Subject: [PATCH 064/261] tcp: avoid wakeups for pure ACK TCP prequeue mechanism purpose is to let incoming packets being processed by the thread currently blocked in tcp_recvmsg(), instead of behalf of the softirq handler, to better adapt flow control on receiver host capacity to schedule the consumer. But in typical request/answer workloads, we send request, then block to receive the answer. And before the actual answer, TCP stack receives the ACK packets acknowledging the request. Processing pure ACK on behalf of the thread blocked in tcp_recvmsg() is a waste of resources, as thread has to immediately sleep again because it got no payload. This patch avoids the extra context switches and scheduler overhead. Before patch : a:~# echo 0 >/proc/sys/net/ipv4/tcp_low_latency a:~# perf stat ./super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k 231676 Performance counter stats for './super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k': 116251.501765 task-clock # 11.369 CPUs utilized 5,025,463 context-switches # 0.043 M/sec 1,074,511 CPU-migrations # 0.009 M/sec 216,923 page-faults # 0.002 M/sec 311,636,972,396 cycles # 2.681 GHz 260,507,138,069 stalled-cycles-frontend # 83.59% frontend cycles idle 155,590,092,840 stalled-cycles-backend # 49.93% backend cycles idle 100,101,255,411 instructions # 0.32 insns per cycle # 2.60 stalled cycles per insn 16,535,930,999 branches # 142.243 M/sec 646,483,591 branch-misses # 3.91% of all branches 10.225482774 seconds time elapsed After patch : a:~# echo 0 >/proc/sys/net/ipv4/tcp_low_latency a:~# perf stat ./super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k 233297 Performance counter stats for './super_netperf 300 -t TCP_RR -l 10 -H 7.7.7.84 -- -r 8k,8k': 91084.870855 task-clock # 8.887 CPUs utilized 2,485,916 context-switches # 0.027 M/sec 815,520 CPU-migrations # 0.009 M/sec 216,932 page-faults # 0.002 M/sec 245,195,022,629 cycles # 2.692 GHz 202,635,777,041 stalled-cycles-frontend # 82.64% frontend cycles idle 124,280,372,407 stalled-cycles-backend # 50.69% backend cycles idle 83,457,289,618 instructions # 0.34 insns per cycle # 2.43 stalled cycles per insn 13,431,472,361 branches # 147.461 M/sec 504,470,665 branch-misses # 3.76% of all branches 10.249594448 seconds time elapsed Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Yuchung Cheng Cc: Andi Kleen Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index 23f2e98d4b65..cf0694d4ad60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1045,6 +1045,10 @@ static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (sysctl_tcp_low_latency || !tp->ucopy.task) return false; + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; if (tp->ucopy.memory > sk->sk_rcvbuf) { From faf1e7857a1b87cd8baf48c3e962142e21ad417c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Wed, 27 Feb 2013 13:01:57 +0000 Subject: [PATCH 065/261] r8169: honor jumbo settings when chipset is requested to start. Some hardware start settings implicitely assume an usual 1500 bytes mtu that can't be guaranteed because changes of mtu may be requested both before and after the hardware is started. Reported-by: Tomi Orava Signed-off-by: Francois Romieu Cc: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 8900398ba103..28fb50a1e9c3 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4765,8 +4765,10 @@ static void rtl_hw_start_8168bb(struct rtl8169_private *tp) RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); - rtl_tx_performance_tweak(pdev, - (0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN); + if (tp->dev->mtu <= ETH_DATA_LEN) { + rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) | + PCI_EXP_DEVCTL_NOSNOOP_EN); + } } static void rtl_hw_start_8168bef(struct rtl8169_private *tp) @@ -4789,7 +4791,8 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp) RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); rtl_disable_clock_request(pdev); @@ -4822,7 +4825,8 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp) RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); } @@ -4841,7 +4845,8 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) RTL_W8(MaxTxPacketSize, TxPacketMax); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); } @@ -4901,7 +4906,8 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp) RTL_W8(MaxTxPacketSize, TxPacketMax); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); } @@ -4913,7 +4919,8 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp) rtl_csi_access_enable_1(tp); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); RTL_W8(MaxTxPacketSize, TxPacketMax); @@ -4972,7 +4979,8 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1)); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); RTL_W8(MaxTxPacketSize, TxPacketMax); @@ -4998,7 +5006,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2)); - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + if (tp->dev->mtu <= ETH_DATA_LEN) + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); From 8ce7684533013d0a0dfbd627ed0430ecb0c82213 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 27 Feb 2013 13:06:44 +0000 Subject: [PATCH 066/261] bnx2x: Fix port identification for the 84834 Fix the "ethtool -p" for boards with BCM84834, by using LED4 of the PHY to toggle the link LED while keeping interrupt disabled to avoid NIG attentions, and at the end restore NIG to previous state. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_link.c | 60 +++++++++++++++++++ .../net/ethernet/broadcom/bnx2x/bnx2x_link.h | 3 +- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 1663e0b6b5a0..4719ab1a31e3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -10422,6 +10422,28 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED1_MASK, 0x0); + if (phy->type == + PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834) { + /* Disable MI_INT interrupt before setting LED4 + * source to constant off. + */ + if (REG_RD(bp, NIG_REG_MASK_INTERRUPT_PORT0 + + params->port*4) & + NIG_MASK_MI_INT) { + params->link_flags |= + LINK_FLAGS_INT_DISABLED; + + bnx2x_bits_dis( + bp, + NIG_REG_MASK_INTERRUPT_PORT0 + + params->port*4, + NIG_MASK_MI_INT); + } + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, + MDIO_PMA_REG_8481_SIGNAL_MASK, + 0x0); + } } break; case LED_MODE_ON: @@ -10468,6 +10490,28 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED1_MASK, 0x20); + if (phy->type == + PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834) { + /* Disable MI_INT interrupt before setting LED4 + * source to constant on. + */ + if (REG_RD(bp, NIG_REG_MASK_INTERRUPT_PORT0 + + params->port*4) & + NIG_MASK_MI_INT) { + params->link_flags |= + LINK_FLAGS_INT_DISABLED; + + bnx2x_bits_dis( + bp, + NIG_REG_MASK_INTERRUPT_PORT0 + + params->port*4, + NIG_MASK_MI_INT); + } + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, + MDIO_PMA_REG_8481_SIGNAL_MASK, + 0x20); + } } break; @@ -10532,6 +10576,22 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LINK_SIGNAL, val); + if (phy->type == + PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834) { + /* Restore LED4 source to external link, + * and re-enable interrupts. + */ + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, + MDIO_PMA_REG_8481_SIGNAL_MASK, + 0x40); + if (params->link_flags & + LINK_FLAGS_INT_DISABLED) { + bnx2x_link_int_enable(params); + params->link_flags &= + ~LINK_FLAGS_INT_DISABLED; + } + } } break; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h index d25c7d79787a..be5c195d03dd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h @@ -307,7 +307,8 @@ struct link_params { struct bnx2x *bp; u16 req_fc_auto_adv; /* Should be set to TX / BOTH when req_flow_ctrl is set to AUTO */ - u16 rsrv1; + u16 link_flags; +#define LINK_FLAGS_INT_DISABLED (1<<0) u32 lfa_base; }; From be94bea753a4d5ac0982df07aaeaf0cb1f7554dd Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 27 Feb 2013 13:06:45 +0000 Subject: [PATCH 067/261] bnx2x: Fix KR2 link Fix KR2 link down problem after reboot when link speed is reconfigured via ethtool. Since 1G/10G support link speed were missing by default, 1G/10G link speed were not advertised. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 6 ++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 9a674b14b403..edfa67adf2f9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -281,6 +281,8 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->lp_advertising |= ADVERTISED_2500baseX_Full; if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE) cmd->lp_advertising |= ADVERTISED_10000baseT_Full; + if (status & LINK_STATUS_LINK_PARTNER_20GXFD_CAPABLE) + cmd->lp_advertising |= ADVERTISED_20000baseKR2_Full; } cmd->maxtxpkt = 0; @@ -463,6 +465,10 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) ADVERTISED_10000baseKR_Full)) bp->link_params.speed_cap_mask[cfg_idx] |= PORT_HW_CFG_SPEED_CAPABILITY_D0_10G; + + if (cmd->advertising & ADVERTISED_20000baseKR2_Full) + bp->link_params.speed_cap_mask[cfg_idx] |= + PORT_HW_CFG_SPEED_CAPABILITY_D0_20G; } } else { /* forced speed */ /* advertise the requested speed and duplex if supported */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 4719ab1a31e3..bf69c5306e46 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -11851,6 +11851,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port, phy->media_type = ETH_PHY_KR; phy->flags |= FLAGS_WC_DUAL_MODE; phy->supported &= (SUPPORTED_20000baseKR2_Full | + SUPPORTED_10000baseT_Full | + SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE | SUPPORTED_Pause | From d521de04a73abb5e662c12eafa8c839aaaa6ae4f Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 27 Feb 2013 13:06:46 +0000 Subject: [PATCH 068/261] bnx2x: Fix KR2 work-around condition Fix condition typo for running KR2 work-around though it doesn't have real effect since the typo bits matched by chance. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index bf69c5306e46..31c5787970db 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -13499,7 +13499,7 @@ void bnx2x_period_func(struct link_params *params, struct link_vars *vars) struct bnx2x_phy *phy = ¶ms->phy[INT_PHY]; bnx2x_set_aer_mmd(params, phy); if ((phy->supported & SUPPORTED_20000baseKR2_Full) && - (phy->speed_cap_mask & SPEED_20000)) + (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) bnx2x_check_kr2_wa(params, vars, phy); bnx2x_check_over_curr(params, vars); if (vars->rx_tx_asic_rst) From b2a431915d19893f047e0dd149d0c1b9d2a0b960 Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Thu, 28 Feb 2013 01:01:52 +0000 Subject: [PATCH 069/261] phy: Fix phy_device_free memory leak Fix memory leak in phy_device_free() for the case when phy_device* returned by phy_device_create() is not registered in the system. Bug description: phy_device_create() sets name of kobject using dev_set_name(), which allocates memory using kvasprintf(), but this memory isn't freed if the underlying device isn't registered properly, because kobject_cleanup() is not called in that case. This can happen (and actually is happening on our machines) if phy_device_register(), called by mdiobus_scan(), fails. Patch description: Embedded struct device is initialized in phy_device_create() and it counterpart phy_device_free() just drops one reference to the device, which leads to proper deinitialization including releasing the kobject name memory. Signed-off-by: Petr Malat Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9930f9999561..3657b4a29124 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -44,13 +44,13 @@ MODULE_LICENSE("GPL"); void phy_device_free(struct phy_device *phydev) { - kfree(phydev); + put_device(&phydev->dev); } EXPORT_SYMBOL(phy_device_free); static void phy_device_release(struct device *dev) { - phy_device_free(to_phy_device(dev)); + kfree(to_phy_device(dev)); } static struct phy_driver genphy_driver; @@ -201,6 +201,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, there's no driver _already_ loaded. */ request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id)); + device_initialize(&dev->dev); + return dev; } EXPORT_SYMBOL(phy_device_create); @@ -363,9 +365,9 @@ int phy_device_register(struct phy_device *phydev) /* Run all of the fixups for this PHY */ phy_scan_fixups(phydev); - err = device_register(&phydev->dev); + err = device_add(&phydev->dev); if (err) { - pr_err("phy %d failed to register\n", phydev->addr); + pr_err("PHY %d failed to add\n", phydev->addr); goto out; } From 02e711276d444343656c25b91b42996c62726712 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 28 Feb 2013 07:16:54 +0000 Subject: [PATCH 070/261] bgmac: omit the fcs Do not include the frame check sequence when adding the skb to netif_receive_skb(). This causes problems when this interface was bridged to a wifi ap and a big package should be forwarded from this Ethernet driver through a bride to the wifi client. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index bf985c04524e..bce30e72dfde 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -301,12 +301,16 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n", ring->start); } else { + /* Omit CRC. */ + len -= ETH_FCS_LEN; + new_skb = netdev_alloc_skb_ip_align(bgmac->net_dev, len); if (new_skb) { skb_put(new_skb, len); skb_copy_from_linear_data_offset(skb, BGMAC_RX_FRAME_OFFSET, new_skb->data, len); + skb_checksum_none_assert(skb); new_skb->protocol = eth_type_trans(new_skb, bgmac->net_dev); netif_receive_skb(new_skb); From 32fcafbcd1c9f6c7013016a22a5369b4acb93577 Mon Sep 17 00:00:00 2001 From: Vlastimil Kosar Date: Thu, 28 Feb 2013 08:45:22 +0000 Subject: [PATCH 071/261] net/phy: micrel: Disable asymmetric pause for KSZ9021 Phyter KSZ9021 has hardware bug. If asymmetric pause is enabled, then it is necessary to disconnect and then reconnect the ethernet cable to get the phyter working. The solution is to disable the asymmetric pause. Signed-off-by: Vlastimil Kosar Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 29934446436a..abf7b6153d00 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -257,8 +257,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, .name = "Micrel KSZ9021 Gigabit PHY", - .features = (PHY_GBIT_FEATURES | SUPPORTED_Pause - | SUPPORTED_Asym_Pause), + .features = (PHY_GBIT_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .config_aneg = genphy_config_aneg, From 4ceb73ae5a09416e040bce8bcfa7218dc5149ad8 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 24 Feb 2013 19:26:25 -0800 Subject: [PATCH 072/261] regulator: db8500-prcmu - remove incorrect __exit markup Even if bus is not hot-pluggable, the devices can be unbound from the driver via sysfs, so we should not be using __exit annotations on remove() methods. The only exception is drivers registered with platform_driver_probe() which specifically disables sysfs bind/unbind attributes. Signed-off-by: Dmitry Torokhov Signed-off-by: Mark Brown --- drivers/regulator/db8500-prcmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c index 219d162b651e..a53c11a529d5 100644 --- a/drivers/regulator/db8500-prcmu.c +++ b/drivers/regulator/db8500-prcmu.c @@ -528,7 +528,7 @@ static int db8500_regulator_probe(struct platform_device *pdev) return 0; } -static int __exit db8500_regulator_remove(struct platform_device *pdev) +static int db8500_regulator_remove(struct platform_device *pdev) { int i; @@ -553,7 +553,7 @@ static struct platform_driver db8500_regulator_driver = { .owner = THIS_MODULE, }, .probe = db8500_regulator_probe, - .remove = __exit_p(db8500_regulator_remove), + .remove = db8500_regulator_remove, }; static int __init db8500_regulator_init(void) From 5838b032fd69ae47565ddc50062decf9055e1628 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 28 Feb 2013 18:12:47 -0600 Subject: [PATCH 073/261] regulator: core: update kernel documentation for regulator_desc commit df367931 (regulator: core: Provide regmap get/set bypass operations) introduced regulator_[gs]et_bypass_regmap However structure documentation for regulator_desc needs an update. ./scripts/kernel-doc include/linux/regulator/driver.h >/dev/null generates: Warning(include/linux/regulator/driver.h:233): No description found for parameter 'bypass_reg' Warning(include/linux/regulator/driver.h:233): No description found for parameter 'bypass_mask' Cc: Liam Girdwood Cc: Mark Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 23070fd83872..7df93f52db08 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -199,6 +199,8 @@ enum regulator_type { * output when using regulator_set_voltage_sel_regmap * @enable_reg: Register for control when using regmap enable/disable ops * @enable_mask: Mask for control when using regmap enable/disable ops + * @bypass_reg: Register for control when using regmap set_bypass + * @bypass_mask: Mask for control when using regmap set_bypass * * @enable_time: Time taken for initial enable of regulator (in uS). */ From 9345dfb8495aa17ce7c575e1a96e5ad64def0b3d Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 28 Feb 2013 18:44:54 -0600 Subject: [PATCH 074/261] regulator: core: fix documentation error in regulator_allow_bypass commit f59c8f9f (regulator: core: Support bypass mode) has a short documentation error around the regulator_allow_bypass parameter 'enable' which is documented as 'allow'. This generates kernel-doc warning as follows: ./scripts/kernel-doc drivers/regulator/core.c >/dev/null Warning(drivers/regulator/core.c:2841): No description found for parameter 'enable' Warning(drivers/regulator/core.c:2841): Excess function parameter 'allow' description in 'regulator_allow_bypass' Cc: Liam Girdwood Cc: Mark Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index da9782bd27d0..154bc8f0c1a0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2830,7 +2830,7 @@ EXPORT_SYMBOL_GPL(regulator_get_bypass_regmap); * regulator_allow_bypass - allow the regulator to go into bypass mode * * @regulator: Regulator to configure - * @allow: enable or disable bypass mode + * @enable: enable or disable bypass mode * * Allow the regulator to go into bypass mode if all other consumers * for the regulator also enable bypass mode and the machine From 283189d3be56aa6db6f192bb255df68493cd79ac Mon Sep 17 00:00:00 2001 From: Li Fei Date: Thu, 28 Feb 2013 15:37:11 +0800 Subject: [PATCH 075/261] regmap: irq: call pm_runtime_put in pm_runtime_get_sync failed case Even in failed case of pm_runtime_get_sync, the usage_count is incremented. In order to keep the usage_count with correct value and runtime power management to behave correctly, call pm_runtime_put(_sync) in such case. Signed-off-by Liu Chuansheng Signed-off-by: Li Fei Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4706c63d0bc6..020ea2b9fd2f 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -184,6 +184,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) if (ret < 0) { dev_err(map->dev, "IRQ thread failed to resume: %d\n", ret); + pm_runtime_put(map->dev); return IRQ_NONE; } } From a7dddf2757d09ba38683b359a721dc2efe85cd24 Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Sat, 23 Feb 2013 16:35:40 +0000 Subject: [PATCH 076/261] regulator: palmas: fix number of SMPS voltages Number of voltages for SMPS regulators was off by one. Signed-off-by: Graeme Gregory Signed-off-by: Ian Lartey Acked-by: Laxman Dewangan Signed-off-by: Mark Brown --- drivers/regulator/palmas-regulator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index cde13bb5a8fb..39cf14606784 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -4,6 +4,7 @@ * Copyright 2011-2012 Texas Instruments Inc. * * Author: Graeme Gregory + * Author: Ian Lartey * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -156,7 +157,7 @@ static const struct regs_info palmas_regs_info[] = { * * So they are basically (maxV-minV)/stepV */ -#define PALMAS_SMPS_NUM_VOLTAGES 116 +#define PALMAS_SMPS_NUM_VOLTAGES 117 #define PALMAS_SMPS10_NUM_VOLTAGES 2 #define PALMAS_LDO_NUM_VOLTAGES 50 From 6949fbe5b2d7b73dfeddeb470a56385ca36b4827 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 16 Feb 2013 10:09:54 +0800 Subject: [PATCH 077/261] regulator: twl: Convert twl4030ldo_ops to get_voltage_sel This fixes an inconsistent behavior between list_voltage() and get_voltage() because current implementation of get_voltage() does not check the case IS_UNSUP() is true. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/twl-regulator.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 74508cc62d67..f705d25b437c 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -471,24 +471,23 @@ twl4030ldo_set_voltage_sel(struct regulator_dev *rdev, unsigned selector) selector); } -static int twl4030ldo_get_voltage(struct regulator_dev *rdev) +static int twl4030ldo_get_voltage_sel(struct regulator_dev *rdev) { struct twlreg_info *info = rdev_get_drvdata(rdev); - int vsel = twlreg_read(info, TWL_MODULE_PM_RECEIVER, - VREG_VOLTAGE); + int vsel = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_VOLTAGE); if (vsel < 0) return vsel; vsel &= info->table_len - 1; - return LDO_MV(info->table[vsel]) * 1000; + return vsel; } static struct regulator_ops twl4030ldo_ops = { .list_voltage = twl4030ldo_list_voltage, .set_voltage_sel = twl4030ldo_set_voltage_sel, - .get_voltage = twl4030ldo_get_voltage, + .get_voltage_sel = twl4030ldo_get_voltage_sel, .enable = twl4030reg_enable, .disable = twl4030reg_disable, From b3df026ea230b233f5a4ebc7400033f7326fad12 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 26 Feb 2013 23:35:46 +0000 Subject: [PATCH 078/261] ASoC: wm8960: Correct register 0 and 1 defaults Signed-off-by: Mark Brown --- sound/soc/codecs/wm8960.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 9bb927325993..4cb49eb85508 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -53,8 +53,8 @@ * using 2 wire for device control, so we cache them instead. */ static const struct reg_default wm8960_reg_defaults[] = { - { 0x0, 0x0097 }, - { 0x1, 0x0097 }, + { 0x0, 0x00a7 }, + { 0x1, 0x00a7 }, { 0x2, 0x0000 }, { 0x3, 0x0000 }, { 0x4, 0x0000 }, From 44426de4d87682870b35a649b76586177113f5e7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 26 Feb 2013 23:36:48 +0000 Subject: [PATCH 079/261] ASoC: wm8960: Fix ADC power bits Signed-off-by: Mark Brown --- sound/soc/codecs/wm8960.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 4cb49eb85508..a64b93425ae3 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -323,8 +323,8 @@ SND_SOC_DAPM_MIXER("Left Input Mixer", WM8960_POWER3, 5, 0, SND_SOC_DAPM_MIXER("Right Input Mixer", WM8960_POWER3, 4, 0, wm8960_rin, ARRAY_SIZE(wm8960_rin)), -SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER2, 3, 0), -SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER2, 2, 0), +SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER1, 3, 0), +SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER1, 2, 0), SND_SOC_DAPM_DAC("Left DAC", "Playback", WM8960_POWER2, 8, 0), SND_SOC_DAPM_DAC("Right DAC", "Playback", WM8960_POWER2, 7, 0), From c9b5669031a72026e41c4a7a094ac1efa4ef0ef5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 15 Feb 2013 10:37:26 +0000 Subject: [PATCH 080/261] ASoC: wm5102: Correct OUT2 volume and switch names Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm5102.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index ab69c83626cd..deb109709bf4 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -755,7 +755,7 @@ SOC_SINGLE("SPKDAT1 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_5L, SOC_DOUBLE_R("HPOUT1 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_MUTE_SHIFT, 1, 1), -SOC_DOUBLE_R("OUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R("HPOUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_MUTE_SHIFT, 1, 1), SOC_SINGLE("EPOUT Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_3L, ARIZONA_OUT3L_MUTE_SHIFT, 1, 1), @@ -767,7 +767,7 @@ SOC_DOUBLE_R("SPKDAT1 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_5L, SOC_DOUBLE_R_TLV("HPOUT1 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_VOL_SHIFT, 0xbf, 0, digital_tlv), -SOC_DOUBLE_R_TLV("OUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R_TLV("HPOUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_VOL_SHIFT, 0xbf, 0, digital_tlv), SOC_SINGLE_TLV("EPOUT Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_3L, From 5752ec93f3a120f0d4088565989eaea27db7a0d8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 15 Feb 2013 10:37:27 +0000 Subject: [PATCH 081/261] ASoC: wm5110: Correct OUT2/3 volume and switch names Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm5110.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index a1631320b448..1a97263634e4 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -213,9 +213,9 @@ ARIZONA_MIXER_CONTROLS("SPKDAT2R", ARIZONA_OUT6RMIX_INPUT_1_SOURCE), SOC_SINGLE("HPOUT1 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_1L, ARIZONA_OUT1_OSR_SHIFT, 1, 0), -SOC_SINGLE("OUT2 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_2L, +SOC_SINGLE("HPOUT2 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_2L, ARIZONA_OUT2_OSR_SHIFT, 1, 0), -SOC_SINGLE("OUT3 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_3L, +SOC_SINGLE("HPOUT3 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_3L, ARIZONA_OUT3_OSR_SHIFT, 1, 0), SOC_SINGLE("Speaker High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_4L, ARIZONA_OUT4_OSR_SHIFT, 1, 0), @@ -226,9 +226,9 @@ SOC_SINGLE("SPKDAT2 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_6L, SOC_DOUBLE_R("HPOUT1 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_MUTE_SHIFT, 1, 1), -SOC_DOUBLE_R("OUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R("HPOUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_MUTE_SHIFT, 1, 1), -SOC_DOUBLE_R("OUT3 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_3L, +SOC_DOUBLE_R("HPOUT3 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_3L, ARIZONA_DAC_DIGITAL_VOLUME_3R, ARIZONA_OUT3L_MUTE_SHIFT, 1, 1), SOC_DOUBLE_R("Speaker Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_4L, ARIZONA_DAC_DIGITAL_VOLUME_4R, ARIZONA_OUT4L_MUTE_SHIFT, 1, 1), @@ -240,10 +240,10 @@ SOC_DOUBLE_R("SPKDAT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_6L, SOC_DOUBLE_R_TLV("HPOUT1 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_VOL_SHIFT, 0xbf, 0, digital_tlv), -SOC_DOUBLE_R_TLV("OUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R_TLV("HPOUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_VOL_SHIFT, 0xbf, 0, digital_tlv), -SOC_DOUBLE_R_TLV("OUT3 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_3L, +SOC_DOUBLE_R_TLV("HPOUT3 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_3L, ARIZONA_DAC_DIGITAL_VOLUME_3R, ARIZONA_OUT3L_VOL_SHIFT, 0xbf, 0, digital_tlv), SOC_DOUBLE_R_TLV("Speaker Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_4L, @@ -260,11 +260,11 @@ SOC_DOUBLE_R_RANGE_TLV("HPOUT1 Volume", ARIZONA_OUTPUT_PATH_CONFIG_1L, ARIZONA_OUTPUT_PATH_CONFIG_1R, ARIZONA_OUT1L_PGA_VOL_SHIFT, 0x34, 0x40, 0, ana_tlv), -SOC_DOUBLE_R_RANGE_TLV("OUT2 Volume", ARIZONA_OUTPUT_PATH_CONFIG_2L, +SOC_DOUBLE_R_RANGE_TLV("HPOUT2 Volume", ARIZONA_OUTPUT_PATH_CONFIG_2L, ARIZONA_OUTPUT_PATH_CONFIG_2R, ARIZONA_OUT2L_PGA_VOL_SHIFT, 0x34, 0x40, 0, ana_tlv), -SOC_DOUBLE_R_RANGE_TLV("OUT3 Volume", ARIZONA_OUTPUT_PATH_CONFIG_3L, +SOC_DOUBLE_R_RANGE_TLV("HPOUT3 Volume", ARIZONA_OUTPUT_PATH_CONFIG_3L, ARIZONA_OUTPUT_PATH_CONFIG_3R, ARIZONA_OUT3L_PGA_VOL_SHIFT, 0x34, 0x40, 0, ana_tlv), From fbe31057fafebdc2811a7101b8b4a0460f5417d1 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 1 Mar 2013 12:24:05 +0100 Subject: [PATCH 082/261] regulator: fixed regulator_bulk_enable unwinding code Unwinding code disables all successfully enabled regulators. Error is logged for every failed regulator. Signed-off-by: Andrzej Hajda Signed-off-by: Kyungmin Park Signed-off-by: Mark Brown --- drivers/regulator/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index da9782bd27d0..4a7790c58257 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3057,9 +3057,13 @@ int regulator_bulk_enable(int num_consumers, return 0; err: - pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret); - while (--i >= 0) - regulator_disable(consumers[i].consumer); + for (i = 0; i < num_consumers; i++) { + if (consumers[i].ret < 0) + pr_err("Failed to enable %s: %d\n", consumers[i].supply, + consumers[i].ret); + else + regulator_disable(consumers[i].consumer); + } return ret; } From 8b82547e33e85fc24d4d172a93c796de1fefa81a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 1 Mar 2013 05:02:02 +0000 Subject: [PATCH 083/261] l2tp: Restore socket refcount when sendmsg succeeds The sendmsg() syscall handler for PPPoL2TP doesn't decrease the socket reference counter after successful transmissions. Any successful sendmsg() call from userspace will then increase the reference counter forever, thus preventing the kernel's session and tunnel data from being freed later on. The problem only happens when writing directly on L2TP sockets. PPP sockets attached to L2TP are unaffected as the PPP subsystem uses pppol2tp_xmit() which symmetrically increase/decrease reference counters. This patch adds the missing call to sock_put() before returning from pppol2tp_sendmsg(). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 3f4e3afc191a..6a53371dba1f 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -355,6 +355,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh l2tp_xmit_skb(session, skb, session->hdr_len); sock_put(ps->tunnel_sock); + sock_put(sk); return error; From d8c6f4b9b7848bca8babfc0ae43a50c8ab22fbb9 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 1 Mar 2013 07:44:08 +0000 Subject: [PATCH 084/261] ipv[4|6]: correct dropwatch false positive in local_deliver_finish I had a report recently of a user trying to use dropwatch to localise some frame loss, and they were getting false positives. Turned out they were using a user space SCTP stack that used raw sockets to grab frames. When we don't have a registered protocol for a given packet, we record it as a drop, even if a raw socket receieves the frame. We should only record the drop in the event a raw socket doesnt exist to receive the frames Tested by the reported successfully Signed-off-by: Neil Horman Reported-by: William Reich Tested-by: William Reich CC: "David S. Miller" CC: William Reich CC: eric.dumazet@gmail.com Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 6 ++++-- net/ipv6/ip6_input.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 87abd3e2bd32..2bdf802e28e2 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -228,9 +228,11 @@ static int ip_local_deliver_finish(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } - } else + kfree_skb(skb); + } else { IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + consume_skb(skb); + } } } out: diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5b10414e619e..b1876e52091e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -241,9 +241,11 @@ resubmit: icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); } - } else + kfree_skb(skb); + } else { IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + consume_skb(skb); + } } rcu_read_unlock(); return 0; From 81ce0dbc119fa31af21d02febde1cf923022d4d6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Feb 2013 19:27:43 +0000 Subject: [PATCH 085/261] sctp: use the passed in gfp flags instead GFP_KERNEL This patch doesn't change how the code works because in the current kernel gfp is always GFP_KERNEL. But gfp was obviously intended instead of GFP_KERNEL. Signed-off-by: Dan Carpenter Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 73aad3d16a45..9a680c075141 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -155,7 +155,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); - null_key = sctp_auth_shkey_create(0, GFP_KERNEL); + null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) goto nomem; From bcabdef12da49878789464ad7239e97d83ea5ef5 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 15 Feb 2013 14:46:14 +0900 Subject: [PATCH 086/261] gpiolib: check descriptors validity before use Some functions dereferenced their GPIO descriptor argument without checking its validity first, potentially leading to an oops when given an invalid argument. This patch also makes gpio_get_value() more resilient when given an invalid GPIO, returning 0 instead of silently crashing. Signed-off-by: Alexandre Courbot Cc: Ryan Mallon Signed-off-by: Grant Likely --- drivers/gpio/gpiolib.c | 112 ++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 47 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fff9786cdc64..1a8a7a8f803f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -174,7 +174,7 @@ static int gpio_ensure_requested(struct gpio_desc *desc) /* caller holds gpio_lock *OR* gpio is marked as requested */ static struct gpio_chip *gpiod_to_chip(struct gpio_desc *desc) { - return desc->chip; + return desc ? desc->chip : NULL; } struct gpio_chip *gpio_to_chip(unsigned gpio) @@ -654,6 +654,11 @@ static ssize_t export_store(struct class *class, goto done; desc = gpio_to_desc(gpio); + /* reject invalid GPIOs */ + if (!desc) { + pr_warn("%s: invalid GPIO %ld\n", __func__, gpio); + return -EINVAL; + } /* No extra locking here; FLAG_SYSFS just signifies that the * request and export were done by on behalf of userspace, so @@ -690,12 +695,14 @@ static ssize_t unexport_store(struct class *class, if (status < 0) goto done; - status = -EINVAL; - desc = gpio_to_desc(gpio); /* reject bogus commands (gpio_unexport ignores them) */ - if (!desc) - goto done; + if (!desc) { + pr_warn("%s: invalid GPIO %ld\n", __func__, gpio); + return -EINVAL; + } + + status = -EINVAL; /* No extra locking here; FLAG_SYSFS just signifies that the * request and export were done by on behalf of userspace, so @@ -846,8 +853,10 @@ static int gpiod_export_link(struct device *dev, const char *name, { int status = -EINVAL; - if (!desc) - goto done; + if (!desc) { + pr_warn("%s: invalid GPIO\n", __func__); + return -EINVAL; + } mutex_lock(&sysfs_lock); @@ -865,7 +874,6 @@ static int gpiod_export_link(struct device *dev, const char *name, mutex_unlock(&sysfs_lock); -done: if (status) pr_debug("%s: gpio%d status %d\n", __func__, desc_to_gpio(desc), status); @@ -896,8 +904,10 @@ static int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value) struct device *dev = NULL; int status = -EINVAL; - if (!desc) - goto done; + if (!desc) { + pr_warn("%s: invalid GPIO\n", __func__); + return -EINVAL; + } mutex_lock(&sysfs_lock); @@ -914,7 +924,6 @@ static int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value) unlock: mutex_unlock(&sysfs_lock); -done: if (status) pr_debug("%s: gpio%d status %d\n", __func__, desc_to_gpio(desc), status); @@ -940,8 +949,8 @@ static void gpiod_unexport(struct gpio_desc *desc) struct device *dev = NULL; if (!desc) { - status = -EINVAL; - goto done; + pr_warn("%s: invalid GPIO\n", __func__); + return; } mutex_lock(&sysfs_lock); @@ -962,7 +971,7 @@ static void gpiod_unexport(struct gpio_desc *desc) device_unregister(dev); put_device(dev); } -done: + if (status) pr_debug("%s: gpio%d status %d\n", __func__, desc_to_gpio(desc), status); @@ -1384,12 +1393,13 @@ static int gpiod_request(struct gpio_desc *desc, const char *label) int status = -EPROBE_DEFER; unsigned long flags; + if (!desc) { + pr_warn("%s: invalid GPIO\n", __func__); + return -EINVAL; + } + spin_lock_irqsave(&gpio_lock, flags); - if (!desc) { - status = -EINVAL; - goto done; - } chip = desc->chip; if (chip == NULL) goto done; @@ -1432,8 +1442,7 @@ static int gpiod_request(struct gpio_desc *desc, const char *label) done: if (status) pr_debug("_gpio_request: gpio-%d (%s) status %d\n", - desc ? desc_to_gpio(desc) : -1, - label ? : "?", status); + desc_to_gpio(desc), label ? : "?", status); spin_unlock_irqrestore(&gpio_lock, flags); return status; } @@ -1616,10 +1625,13 @@ static int gpiod_direction_input(struct gpio_desc *desc) int status = -EINVAL; int offset; + if (!desc) { + pr_warn("%s: invalid GPIO\n", __func__); + return -EINVAL; + } + spin_lock_irqsave(&gpio_lock, flags); - if (!desc) - goto fail; chip = desc->chip; if (!chip || !chip->get || !chip->direction_input) goto fail; @@ -1655,13 +1667,9 @@ lose: return status; fail: spin_unlock_irqrestore(&gpio_lock, flags); - if (status) { - int gpio = -1; - if (desc) - gpio = desc_to_gpio(desc); - pr_debug("%s: gpio-%d status %d\n", - __func__, gpio, status); - } + if (status) + pr_debug("%s: gpio-%d status %d\n", __func__, + desc_to_gpio(desc), status); return status; } @@ -1678,6 +1686,11 @@ static int gpiod_direction_output(struct gpio_desc *desc, int value) int status = -EINVAL; int offset; + if (!desc) { + pr_warn("%s: invalid GPIO\n", __func__); + return -EINVAL; + } + /* Open drain pin should not be driven to 1 */ if (value && test_bit(FLAG_OPEN_DRAIN, &desc->flags)) return gpiod_direction_input(desc); @@ -1688,8 +1701,6 @@ static int gpiod_direction_output(struct gpio_desc *desc, int value) spin_lock_irqsave(&gpio_lock, flags); - if (!desc) - goto fail; chip = desc->chip; if (!chip || !chip->set || !chip->direction_output) goto fail; @@ -1725,13 +1736,9 @@ lose: return status; fail: spin_unlock_irqrestore(&gpio_lock, flags); - if (status) { - int gpio = -1; - if (desc) - gpio = desc_to_gpio(desc); - pr_debug("%s: gpio-%d status %d\n", - __func__, gpio, status); - } + if (status) + pr_debug("%s: gpio-%d status %d\n", __func__, + desc_to_gpio(desc), status); return status; } @@ -1753,10 +1760,13 @@ static int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) int status = -EINVAL; int offset; + if (!desc) { + pr_warn("%s: invalid GPIO\n", __func__); + return -EINVAL; + } + spin_lock_irqsave(&gpio_lock, flags); - if (!desc) - goto fail; chip = desc->chip; if (!chip || !chip->set || !chip->set_debounce) goto fail; @@ -1776,13 +1786,9 @@ static int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) fail: spin_unlock_irqrestore(&gpio_lock, flags); - if (status) { - int gpio = -1; - if (desc) - gpio = desc_to_gpio(desc); - pr_debug("%s: gpio-%d status %d\n", - __func__, gpio, status); - } + if (status) + pr_debug("%s: gpio-%d status %d\n", __func__, + desc_to_gpio(desc), status); return status; } @@ -1830,6 +1836,8 @@ static int gpiod_get_value(struct gpio_desc *desc) int value; int offset; + if (!desc) + return 0; chip = desc->chip; offset = gpio_chip_hwgpio(desc); /* Should be using gpio_get_value_cansleep() */ @@ -1912,6 +1920,8 @@ static void gpiod_set_value(struct gpio_desc *desc, int value) { struct gpio_chip *chip; + if (!desc) + return; chip = desc->chip; /* Should be using gpio_set_value_cansleep() */ WARN_ON(chip->can_sleep); @@ -1940,6 +1950,8 @@ EXPORT_SYMBOL_GPL(__gpio_set_value); */ static int gpiod_cansleep(struct gpio_desc *desc) { + if (!desc) + return 0; /* only call this on GPIOs that are valid! */ return desc->chip->can_sleep; } @@ -1964,6 +1976,8 @@ static int gpiod_to_irq(struct gpio_desc *desc) struct gpio_chip *chip; int offset; + if (!desc) + return -EINVAL; chip = desc->chip; offset = gpio_chip_hwgpio(desc); return chip->to_irq ? chip->to_irq(chip, offset) : -ENXIO; @@ -1987,6 +2001,8 @@ static int gpiod_get_value_cansleep(struct gpio_desc *desc) int offset; might_sleep_if(extra_checks); + if (!desc) + return 0; chip = desc->chip; offset = gpio_chip_hwgpio(desc); value = chip->get ? chip->get(chip, offset) : 0; @@ -2005,6 +2021,8 @@ static void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) struct gpio_chip *chip; might_sleep_if(extra_checks); + if (!desc) + return; chip = desc->chip; trace_gpio_value(desc_to_gpio(desc), 0, value); if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) From def634338d3ffb32fbe9b0a2d70cc24ef909cd4f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 15 Feb 2013 14:46:15 +0900 Subject: [PATCH 087/261] gpiolib: use const parameters when possible Constify descriptor parameter of gpiod_* functions for those that should obviously not modify it. This includes value or direction get, cansleep, and IRQ number query. Signed-off-by: Alexandre Courbot Signed-off-by: Grant Likely --- drivers/gpio/gpiolib.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 1a8a7a8f803f..a33bfc23e9f5 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -88,13 +88,14 @@ static int gpiod_request(struct gpio_desc *desc, const char *label); static void gpiod_free(struct gpio_desc *desc); static int gpiod_direction_input(struct gpio_desc *desc); static int gpiod_direction_output(struct gpio_desc *desc, int value); +static int gpiod_get_direction(const struct gpio_desc *desc); static int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce); -static int gpiod_get_value_cansleep(struct gpio_desc *desc); +static int gpiod_get_value_cansleep(const struct gpio_desc *desc); static void gpiod_set_value_cansleep(struct gpio_desc *desc, int value); -static int gpiod_get_value(struct gpio_desc *desc); +static int gpiod_get_value(const struct gpio_desc *desc); static void gpiod_set_value(struct gpio_desc *desc, int value); -static int gpiod_cansleep(struct gpio_desc *desc); -static int gpiod_to_irq(struct gpio_desc *desc); +static int gpiod_cansleep(const struct gpio_desc *desc); +static int gpiod_to_irq(const struct gpio_desc *desc); static int gpiod_export(struct gpio_desc *desc, bool direction_may_change); static int gpiod_export_link(struct device *dev, const char *name, struct gpio_desc *desc); @@ -172,7 +173,7 @@ static int gpio_ensure_requested(struct gpio_desc *desc) } /* caller holds gpio_lock *OR* gpio is marked as requested */ -static struct gpio_chip *gpiod_to_chip(struct gpio_desc *desc) +static struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) { return desc ? desc->chip : NULL; } @@ -207,7 +208,7 @@ static int gpiochip_find_base(int ngpio) } /* caller ensures gpio is valid and requested, chip->get_direction may sleep */ -static int gpiod_get_direction(struct gpio_desc *desc) +static int gpiod_get_direction(const struct gpio_desc *desc) { struct gpio_chip *chip; unsigned offset; @@ -223,11 +224,13 @@ static int gpiod_get_direction(struct gpio_desc *desc) if (status > 0) { /* GPIOF_DIR_IN, or other positive */ status = 1; - clear_bit(FLAG_IS_OUT, &desc->flags); + /* FLAG_IS_OUT is just a cache of the result of get_direction(), + * so it does not affect constness per se */ + clear_bit(FLAG_IS_OUT, &((struct gpio_desc *)desc)->flags); } if (status == 0) { /* GPIOF_DIR_OUT */ - set_bit(FLAG_IS_OUT, &desc->flags); + set_bit(FLAG_IS_OUT, &((struct gpio_desc *)desc)->flags); } return status; } @@ -263,7 +266,7 @@ static DEFINE_MUTEX(sysfs_lock); static ssize_t gpio_direction_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct gpio_desc *desc = dev_get_drvdata(dev); + const struct gpio_desc *desc = dev_get_drvdata(dev); ssize_t status; mutex_lock(&sysfs_lock); @@ -1830,7 +1833,7 @@ EXPORT_SYMBOL_GPL(gpio_set_debounce); * It returns the zero or nonzero value provided by the associated * gpio_chip.get() method; or zero if no such method is provided. */ -static int gpiod_get_value(struct gpio_desc *desc) +static int gpiod_get_value(const struct gpio_desc *desc) { struct gpio_chip *chip; int value; @@ -1948,7 +1951,7 @@ EXPORT_SYMBOL_GPL(__gpio_set_value); * This is used directly or indirectly to implement gpio_cansleep(). It * returns nonzero if access reading or writing the GPIO value can sleep. */ -static int gpiod_cansleep(struct gpio_desc *desc) +static int gpiod_cansleep(const struct gpio_desc *desc) { if (!desc) return 0; @@ -1971,7 +1974,7 @@ EXPORT_SYMBOL_GPL(__gpio_cansleep); * It returns the number of the IRQ signaled by this (input) GPIO, * or a negative errno. */ -static int gpiod_to_irq(struct gpio_desc *desc) +static int gpiod_to_irq(const struct gpio_desc *desc) { struct gpio_chip *chip; int offset; @@ -1994,7 +1997,7 @@ EXPORT_SYMBOL_GPL(__gpio_to_irq); * Common examples include ones connected to I2C or SPI chips. */ -static int gpiod_get_value_cansleep(struct gpio_desc *desc) +static int gpiod_get_value_cansleep(const struct gpio_desc *desc) { struct gpio_chip *chip; int value; From 24d7628fe8b10bb3770a11ddf71719613832a298 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 15 Feb 2013 14:46:16 +0900 Subject: [PATCH 088/261] gpiolib: move comment to right function This comment applies to gpio_to_chip(), not gpiod_to_chip(). Signed-off-by: Alexandre Courbot Signed-off-by: Grant Likely --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a33bfc23e9f5..c2534d62911c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -172,12 +172,12 @@ static int gpio_ensure_requested(struct gpio_desc *desc) return 0; } -/* caller holds gpio_lock *OR* gpio is marked as requested */ static struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) { return desc ? desc->chip : NULL; } +/* caller holds gpio_lock *OR* gpio is marked as requested */ struct gpio_chip *gpio_to_chip(unsigned gpio) { return gpiod_to_chip(gpio_to_desc(gpio)); From e97f9b5277afeabb54892ebc6f68500098467ba1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 27 Feb 2013 17:25:15 +0200 Subject: [PATCH 089/261] gpio/gpio-ich: fix ichx_gpio_check_available() return what callers expect ichx_gpio_check_available() returns either 0 or -ENXIO depending on whether the given GPIO is available or not. However, callers of this function treat the return value as boolean: ... if (!ichx_gpio_check_available(gpio, nr)) return -ENXIO; which erroneusly fails when the GPIO is available and not vice versa. Fix this by making the function return boolean as expected by the callers. Signed-off-by: Mika Westerberg Signed-off-by: Grant Likely --- drivers/gpio/gpio-ich.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 6f2306db8591..f9dbd503fc40 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -128,9 +128,9 @@ static int ichx_read_bit(int reg, unsigned nr) return data & (1 << bit) ? 1 : 0; } -static int ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr) +static bool ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr) { - return (ichx_priv.use_gpio & (1 << (nr / 32))) ? 0 : -ENXIO; + return ichx_priv.use_gpio & (1 << (nr / 32)); } static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) From e2ca90c276e1fc410d7cd3c1a4eee245ec902a20 Mon Sep 17 00:00:00 2001 From: Freddy Xin Date: Sat, 2 Mar 2013 00:41:11 +0000 Subject: [PATCH 090/261] ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver This is a resubmission. Added kfree() in ax88179_get_eeprom to prevent memory leakage. Modified "__le16 rxctl" to "u16 rxctl" in "struct ax88179_data" and removed pointless casts. Removed asix_init and asix_exit functions and added "module_usb_driver(ax88179_178a_driver)". Fixed endianness issue on big endian systems and verified this driver on iBook G4. Removed steps that change net->features in ax88179_set_features function. Added "const" to ethtool_ops structure and fixed the coding style of AX88179_BULKIN_SIZE array. Fixed the issue that the default MTU is not 1500. Added ax88179_change_mtu function and enabled the hardware jumbo frame function to support an MTU higher than 1500. Fixed indentation and empty line coding style errors. The _nopm version usb functions were added to access register in suspend and resume functions. Serveral variables allocted dynamically were removed and replaced by stack variables. ax88179_get_eeprom were modified from asix_get_eeprom in asix_common. This patch adds a driver for ASIX's AX88179 family of USB 3.0/2.0 to gigabit ethernet adapters. It's based on the AX88xxx driver but the usb commands used to access registers for AX88179 are completely different. This driver had been verified on x86 system with AX88179/AX88178A and Sitcomm LN-032 USB dongles. Signed-off-by: Freddy Xin Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 18 + drivers/net/usb/Makefile | 1 + drivers/net/usb/ax88179_178a.c | 1448 ++++++++++++++++++++++++++++++++ 3 files changed, 1467 insertions(+) create mode 100644 drivers/net/usb/ax88179_178a.c diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index da92ed3797aa..3b6e9b83342d 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -156,6 +156,24 @@ config USB_NET_AX8817X This driver creates an interface named "ethX", where X depends on what other networking devices you have in use. +config USB_NET_AX88179_178A + tristate "ASIX AX88179/178A USB 3.0/2.0 to Gigabit Ethernet" + depends on USB_USBNET + select CRC32 + select PHYLIB + default y + help + This option adds support for ASIX AX88179 based USB 3.0/2.0 + to Gigabit Ethernet adapters. + + This driver should work with at least the following devices: + * ASIX AX88179 + * ASIX AX88178A + * Sitcomm LN-032 + + This driver creates an interface named "ethX", where X depends on + what other networking devices you have in use. + config USB_NET_CDCETHER tristate "CDC Ethernet support (smart devices such as cable modems)" depends on USB_USBNET diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 478691326f37..119b06c9aa16 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_USB_RTL8150) += rtl8150.o obj-$(CONFIG_USB_HSO) += hso.o obj-$(CONFIG_USB_NET_AX8817X) += asix.o asix-y := asix_devices.o asix_common.o ax88172a.o +obj-$(CONFIG_USB_NET_AX88179_178A) += ax88179_178a.o obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o obj-$(CONFIG_USB_NET_CDC_EEM) += cdc_eem.o obj-$(CONFIG_USB_NET_DM9601) += dm9601.o diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c new file mode 100644 index 000000000000..71c27d8d214f --- /dev/null +++ b/drivers/net/usb/ax88179_178a.c @@ -0,0 +1,1448 @@ +/* + * ASIX AX88179/178A USB 3.0/2.0 to Gigabit Ethernet Devices + * + * Copyright (C) 2011-2013 ASIX + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include + +#define AX88179_PHY_ID 0x03 +#define AX_EEPROM_LEN 0x100 +#define AX88179_EEPROM_MAGIC 0x17900b95 +#define AX_MCAST_FLTSIZE 8 +#define AX_MAX_MCAST 64 +#define AX_INT_PPLS_LINK ((u32)BIT(16)) +#define AX_RXHDR_L4_TYPE_MASK 0x1c +#define AX_RXHDR_L4_TYPE_UDP 4 +#define AX_RXHDR_L4_TYPE_TCP 16 +#define AX_RXHDR_L3CSUM_ERR 2 +#define AX_RXHDR_L4CSUM_ERR 1 +#define AX_RXHDR_CRC_ERR ((u32)BIT(31)) +#define AX_RXHDR_DROP_ERR ((u32)BIT(30)) +#define AX_ACCESS_MAC 0x01 +#define AX_ACCESS_PHY 0x02 +#define AX_ACCESS_EEPROM 0x04 +#define AX_ACCESS_EFUS 0x05 +#define AX_PAUSE_WATERLVL_HIGH 0x54 +#define AX_PAUSE_WATERLVL_LOW 0x55 + +#define PHYSICAL_LINK_STATUS 0x02 + #define AX_USB_SS 0x04 + #define AX_USB_HS 0x02 + +#define GENERAL_STATUS 0x03 +/* Check AX88179 version. UA1:Bit2 = 0, UA2:Bit2 = 1 */ + #define AX_SECLD 0x04 + +#define AX_SROM_ADDR 0x07 +#define AX_SROM_CMD 0x0a + #define EEP_RD 0x04 + #define EEP_BUSY 0x10 + +#define AX_SROM_DATA_LOW 0x08 +#define AX_SROM_DATA_HIGH 0x09 + +#define AX_RX_CTL 0x0b + #define AX_RX_CTL_DROPCRCERR 0x0100 + #define AX_RX_CTL_IPE 0x0200 + #define AX_RX_CTL_START 0x0080 + #define AX_RX_CTL_AP 0x0020 + #define AX_RX_CTL_AM 0x0010 + #define AX_RX_CTL_AB 0x0008 + #define AX_RX_CTL_AMALL 0x0002 + #define AX_RX_CTL_PRO 0x0001 + #define AX_RX_CTL_STOP 0x0000 + +#define AX_NODE_ID 0x10 +#define AX_MULFLTARY 0x16 + +#define AX_MEDIUM_STATUS_MODE 0x22 + #define AX_MEDIUM_GIGAMODE 0x01 + #define AX_MEDIUM_FULL_DUPLEX 0x02 + #define AX_MEDIUM_ALWAYS_ONE 0x04 + #define AX_MEDIUM_EN_125MHZ 0x08 + #define AX_MEDIUM_RXFLOW_CTRLEN 0x10 + #define AX_MEDIUM_TXFLOW_CTRLEN 0x20 + #define AX_MEDIUM_RECEIVE_EN 0x100 + #define AX_MEDIUM_PS 0x200 + #define AX_MEDIUM_JUMBO_EN 0x8040 + +#define AX_MONITOR_MOD 0x24 + #define AX_MONITOR_MODE_RWLC 0x02 + #define AX_MONITOR_MODE_RWMP 0x04 + #define AX_MONITOR_MODE_PMEPOL 0x20 + #define AX_MONITOR_MODE_PMETYPE 0x40 + +#define AX_GPIO_CTRL 0x25 + #define AX_GPIO_CTRL_GPIO3EN 0x80 + #define AX_GPIO_CTRL_GPIO2EN 0x40 + #define AX_GPIO_CTRL_GPIO1EN 0x20 + +#define AX_PHYPWR_RSTCTL 0x26 + #define AX_PHYPWR_RSTCTL_BZ 0x0010 + #define AX_PHYPWR_RSTCTL_IPRL 0x0020 + #define AX_PHYPWR_RSTCTL_AT 0x1000 + +#define AX_RX_BULKIN_QCTRL 0x2e +#define AX_CLK_SELECT 0x33 + #define AX_CLK_SELECT_BCS 0x01 + #define AX_CLK_SELECT_ACS 0x02 + #define AX_CLK_SELECT_ULR 0x08 + +#define AX_RXCOE_CTL 0x34 + #define AX_RXCOE_IP 0x01 + #define AX_RXCOE_TCP 0x02 + #define AX_RXCOE_UDP 0x04 + #define AX_RXCOE_TCPV6 0x20 + #define AX_RXCOE_UDPV6 0x40 + +#define AX_TXCOE_CTL 0x35 + #define AX_TXCOE_IP 0x01 + #define AX_TXCOE_TCP 0x02 + #define AX_TXCOE_UDP 0x04 + #define AX_TXCOE_TCPV6 0x20 + #define AX_TXCOE_UDPV6 0x40 + +#define AX_LEDCTRL 0x73 + +#define GMII_PHY_PHYSR 0x11 + #define GMII_PHY_PHYSR_SMASK 0xc000 + #define GMII_PHY_PHYSR_GIGA 0x8000 + #define GMII_PHY_PHYSR_100 0x4000 + #define GMII_PHY_PHYSR_FULL 0x2000 + #define GMII_PHY_PHYSR_LINK 0x400 + +#define GMII_LED_ACT 0x1a + #define GMII_LED_ACTIVE_MASK 0xff8f + #define GMII_LED0_ACTIVE BIT(4) + #define GMII_LED1_ACTIVE BIT(5) + #define GMII_LED2_ACTIVE BIT(6) + +#define GMII_LED_LINK 0x1c + #define GMII_LED_LINK_MASK 0xf888 + #define GMII_LED0_LINK_10 BIT(0) + #define GMII_LED0_LINK_100 BIT(1) + #define GMII_LED0_LINK_1000 BIT(2) + #define GMII_LED1_LINK_10 BIT(4) + #define GMII_LED1_LINK_100 BIT(5) + #define GMII_LED1_LINK_1000 BIT(6) + #define GMII_LED2_LINK_10 BIT(8) + #define GMII_LED2_LINK_100 BIT(9) + #define GMII_LED2_LINK_1000 BIT(10) + #define LED0_ACTIVE BIT(0) + #define LED0_LINK_10 BIT(1) + #define LED0_LINK_100 BIT(2) + #define LED0_LINK_1000 BIT(3) + #define LED0_FD BIT(4) + #define LED0_USB3_MASK 0x001f + #define LED1_ACTIVE BIT(5) + #define LED1_LINK_10 BIT(6) + #define LED1_LINK_100 BIT(7) + #define LED1_LINK_1000 BIT(8) + #define LED1_FD BIT(9) + #define LED1_USB3_MASK 0x03e0 + #define LED2_ACTIVE BIT(10) + #define LED2_LINK_1000 BIT(13) + #define LED2_LINK_100 BIT(12) + #define LED2_LINK_10 BIT(11) + #define LED2_FD BIT(14) + #define LED_VALID BIT(15) + #define LED2_USB3_MASK 0x7c00 + +#define GMII_PHYPAGE 0x1e +#define GMII_PHY_PAGE_SELECT 0x1f + #define GMII_PHY_PGSEL_EXT 0x0007 + #define GMII_PHY_PGSEL_PAGE0 0x0000 + +struct ax88179_data { + u16 rxctl; + u16 reserved; +}; + +struct ax88179_int_data { + __le32 intdata1; + __le32 intdata2; +}; + +static const struct { + unsigned char ctrl, timer_l, timer_h, size, ifg; +} AX88179_BULKIN_SIZE[] = { + {7, 0x4f, 0, 0x12, 0xff}, + {7, 0x20, 3, 0x16, 0xff}, + {7, 0xae, 7, 0x18, 0xff}, + {7, 0xcc, 0x4c, 0x18, 8}, +}; + +static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm) +{ + int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); + + BUG_ON(!dev); + + if (!in_pm) + fn = usbnet_read_cmd; + else + fn = usbnet_read_cmd_nopm; + + ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); + + if (unlikely(ret < 0)) + netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", + index, ret); + + return ret; +} + +static int __ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm) +{ + int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); + + BUG_ON(!dev); + + if (!in_pm) + fn = usbnet_write_cmd; + else + fn = usbnet_write_cmd_nopm; + + ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); + + if (unlikely(ret < 0)) + netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n", + index, ret); + + return ret; +} + +static void ax88179_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, + u16 index, u16 size, void *data) +{ + u16 buf; + + if (2 == size) { + buf = *((u16 *)data); + cpu_to_le16s(&buf); + usbnet_write_cmd_async(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | + USB_RECIP_DEVICE, value, index, &buf, + size); + } else { + usbnet_write_cmd_async(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | + USB_RECIP_DEVICE, value, index, data, + size); + } +} + +static int ax88179_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, + u16 index, u16 size, void *data) +{ + int ret; + + if (2 == size) { + u16 buf; + ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 1); + le16_to_cpus(&buf); + *((u16 *)data) = buf; + } else if (4 == size) { + u32 buf; + ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 1); + le32_to_cpus(&buf); + *((u32 *)data) = buf; + } else { + ret = __ax88179_read_cmd(dev, cmd, value, index, size, data, 1); + } + + return ret; +} + +static int ax88179_write_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, + u16 index, u16 size, void *data) +{ + int ret; + + if (2 == size) { + u16 buf; + buf = *((u16 *)data); + cpu_to_le16s(&buf); + ret = __ax88179_write_cmd(dev, cmd, value, index, + size, &buf, 1); + } else { + ret = __ax88179_write_cmd(dev, cmd, value, index, + size, data, 1); + } + + return ret; +} + +static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data) +{ + int ret; + + if (2 == size) { + u16 buf; + ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); + le16_to_cpus(&buf); + *((u16 *)data) = buf; + } else if (4 == size) { + u32 buf; + ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); + le32_to_cpus(&buf); + *((u32 *)data) = buf; + } else { + ret = __ax88179_read_cmd(dev, cmd, value, index, size, data, 0); + } + + return ret; +} + +static int ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data) +{ + int ret; + + if (2 == size) { + u16 buf; + buf = *((u16 *)data); + cpu_to_le16s(&buf); + ret = __ax88179_write_cmd(dev, cmd, value, index, + size, &buf, 0); + } else { + ret = __ax88179_write_cmd(dev, cmd, value, index, + size, data, 0); + } + + return ret; +} + +static void ax88179_status(struct usbnet *dev, struct urb *urb) +{ + struct ax88179_int_data *event; + u32 link; + + if (urb->actual_length < 8) + return; + + event = urb->transfer_buffer; + le32_to_cpus((void *)&event->intdata1); + + link = (((__force u32)event->intdata1) & AX_INT_PPLS_LINK) >> 16; + + if (netif_carrier_ok(dev->net) != link) { + if (link) + usbnet_defer_kevent(dev, EVENT_LINK_RESET); + else + netif_carrier_off(dev->net); + + netdev_info(dev->net, "ax88179 - Link status is: %d\n", link); + } +} + +static int ax88179_mdio_read(struct net_device *netdev, int phy_id, int loc) +{ + struct usbnet *dev = netdev_priv(netdev); + u16 res; + + ax88179_read_cmd(dev, AX_ACCESS_PHY, phy_id, (__u16)loc, 2, &res); + return res; +} + +static void ax88179_mdio_write(struct net_device *netdev, int phy_id, int loc, + int val) +{ + struct usbnet *dev = netdev_priv(netdev); + u16 res = (u16) val; + + ax88179_write_cmd(dev, AX_ACCESS_PHY, phy_id, (__u16)loc, 2, &res); +} + +static int ax88179_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct usbnet *dev = usb_get_intfdata(intf); + u16 tmp16; + u8 tmp8; + + usbnet_suspend(intf, message); + + /* Disable RX path */ + ax88179_read_cmd_nopm(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + tmp16 &= ~AX_MEDIUM_RECEIVE_EN; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + + /* Force bulk-in zero length */ + ax88179_read_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, + 2, 2, &tmp16); + + tmp16 |= AX_PHYPWR_RSTCTL_BZ | AX_PHYPWR_RSTCTL_IPRL; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, + 2, 2, &tmp16); + + /* change clock */ + tmp8 = 0; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + + /* Configure RX control register => stop operation */ + tmp16 = AX_RX_CTL_STOP; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &tmp16); + + return 0; +} + +/* This function is used to enable the autodetach function. */ +/* This function is determined by offset 0x43 of EEPROM */ +static int ax88179_auto_detach(struct usbnet *dev, int in_pm) +{ + u16 tmp16; + u8 tmp8; + int (*fnr)(struct usbnet *, u8, u16, u16, u16, void *); + int (*fnw)(struct usbnet *, u8, u16, u16, u16, void *); + + if (!in_pm) { + fnr = ax88179_read_cmd; + fnw = ax88179_write_cmd; + } else { + fnr = ax88179_read_cmd_nopm; + fnw = ax88179_write_cmd_nopm; + } + + if (fnr(dev, AX_ACCESS_EEPROM, 0x43, 1, 2, &tmp16) < 0) + return 0; + + if ((tmp16 == 0xFFFF) || (!(tmp16 & 0x0100))) + return 0; + + /* Enable Auto Detach bit */ + tmp8 = 0; + fnr(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + tmp8 |= AX_CLK_SELECT_ULR; + fnw(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + + fnr(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); + tmp16 |= AX_PHYPWR_RSTCTL_AT; + fnw(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); + + return 0; +} + +static int ax88179_resume(struct usb_interface *intf) +{ + struct usbnet *dev = usb_get_intfdata(intf); + u16 tmp16; + u8 tmp8; + + netif_carrier_off(dev->net); + + /* Power up ethernet PHY */ + tmp16 = 0; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, + 2, 2, &tmp16); + udelay(1000); + + tmp16 = AX_PHYPWR_RSTCTL_IPRL; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, + 2, 2, &tmp16); + msleep(200); + + /* Ethernet PHY Auto Detach*/ + ax88179_auto_detach(dev, 1); + + /* Enable clock */ + ax88179_read_cmd_nopm(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + tmp8 |= AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + msleep(100); + + /* Configure RX control register => start operation */ + tmp16 = AX_RX_CTL_DROPCRCERR | AX_RX_CTL_IPE | AX_RX_CTL_START | + AX_RX_CTL_AP | AX_RX_CTL_AMALL | AX_RX_CTL_AB; + ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &tmp16); + + return usbnet_resume(intf); +} + +static void +ax88179_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) +{ + struct usbnet *dev = netdev_priv(net); + u8 opt; + + if (ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, + 1, 1, &opt) < 0) { + wolinfo->supported = 0; + wolinfo->wolopts = 0; + return; + } + + wolinfo->supported = WAKE_PHY | WAKE_MAGIC; + wolinfo->wolopts = 0; + if (opt & AX_MONITOR_MODE_RWLC) + wolinfo->wolopts |= WAKE_PHY; + if (opt & AX_MONITOR_MODE_RWMP) + wolinfo->wolopts |= WAKE_MAGIC; +} + +static int +ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) +{ + struct usbnet *dev = netdev_priv(net); + u8 opt = 0; + + if (wolinfo->wolopts & WAKE_PHY) + opt |= AX_MONITOR_MODE_RWLC; + if (wolinfo->wolopts & WAKE_MAGIC) + opt |= AX_MONITOR_MODE_RWMP; + + if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, + 1, 1, &opt) < 0) + return -EINVAL; + + return 0; +} + +static int ax88179_get_eeprom_len(struct net_device *net) +{ + return AX_EEPROM_LEN; +} + +static int +ax88179_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, + u8 *data) +{ + struct usbnet *dev = netdev_priv(net); + u16 *eeprom_buff; + int first_word, last_word; + int i, ret; + + if (eeprom->len == 0) + return -EINVAL; + + eeprom->magic = AX88179_EEPROM_MAGIC; + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_buff = kmalloc(sizeof(u16) * (last_word - first_word + 1), + GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + /* ax88179/178A returns 2 bytes from eeprom on read */ + for (i = first_word; i <= last_word; i++) { + ret = __ax88179_read_cmd(dev, AX_ACCESS_EEPROM, i, 1, 2, + &eeprom_buff[i - first_word], + 0); + if (ret < 0) { + kfree(eeprom_buff); + return -EIO; + } + } + + memcpy(data, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len); + kfree(eeprom_buff); + return 0; +} + +static int ax88179_get_settings(struct net_device *net, struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + return mii_ethtool_gset(&dev->mii, cmd); +} + +static int ax88179_set_settings(struct net_device *net, struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + return mii_ethtool_sset(&dev->mii, cmd); +} + + +static int ax88179_ioctl(struct net_device *net, struct ifreq *rq, int cmd) +{ + struct usbnet *dev = netdev_priv(net); + return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL); +} + +static const struct ethtool_ops ax88179_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_msglevel = usbnet_get_msglevel, + .set_msglevel = usbnet_set_msglevel, + .get_wol = ax88179_get_wol, + .set_wol = ax88179_set_wol, + .get_eeprom_len = ax88179_get_eeprom_len, + .get_eeprom = ax88179_get_eeprom, + .get_settings = ax88179_get_settings, + .set_settings = ax88179_set_settings, + .nway_reset = usbnet_nway_reset, +}; + +static void ax88179_set_multicast(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + struct ax88179_data *data = (struct ax88179_data *)dev->data; + u8 *m_filter = ((u8 *)dev->data) + 12; + + data->rxctl = (AX_RX_CTL_START | AX_RX_CTL_AB | AX_RX_CTL_IPE); + + if (net->flags & IFF_PROMISC) { + data->rxctl |= AX_RX_CTL_PRO; + } else if (net->flags & IFF_ALLMULTI || + netdev_mc_count(net) > AX_MAX_MCAST) { + data->rxctl |= AX_RX_CTL_AMALL; + } else if (netdev_mc_empty(net)) { + /* just broadcast and directed */ + } else { + /* We use the 20 byte dev->data for our 8 byte filter buffer + * to avoid allocating memory that is tricky to free later + */ + u32 crc_bits; + struct netdev_hw_addr *ha; + + memset(m_filter, 0, AX_MCAST_FLTSIZE); + + netdev_for_each_mc_addr(ha, net) { + crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26; + *(m_filter + (crc_bits >> 3)) |= (1 << (crc_bits & 7)); + } + + ax88179_write_cmd_async(dev, AX_ACCESS_MAC, AX_MULFLTARY, + AX_MCAST_FLTSIZE, AX_MCAST_FLTSIZE, + m_filter); + + data->rxctl |= AX_RX_CTL_AM; + } + + ax88179_write_cmd_async(dev, AX_ACCESS_MAC, AX_RX_CTL, + 2, 2, &data->rxctl); +} + +static int +ax88179_set_features(struct net_device *net, netdev_features_t features) +{ + u8 tmp; + struct usbnet *dev = netdev_priv(net); + netdev_features_t changed = net->features ^ features; + + if (changed & NETIF_F_IP_CSUM) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, &tmp); + tmp ^= AX_TXCOE_TCP | AX_TXCOE_UDP; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, &tmp); + } + + if (changed & NETIF_F_IPV6_CSUM) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, &tmp); + tmp ^= AX_TXCOE_TCPV6 | AX_TXCOE_UDPV6; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, &tmp); + } + + if (changed & NETIF_F_RXCSUM) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_RXCOE_CTL, 1, 1, &tmp); + tmp ^= AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | + AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RXCOE_CTL, 1, 1, &tmp); + } + + return 0; +} + +static int ax88179_change_mtu(struct net_device *net, int new_mtu) +{ + struct usbnet *dev = netdev_priv(net); + u16 tmp16; + + if (new_mtu <= 0 || new_mtu > 4088) + return -EINVAL; + + net->mtu = new_mtu; + dev->hard_mtu = net->mtu + net->hard_header_len; + + if (net->mtu > 1500) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + tmp16 |= AX_MEDIUM_JUMBO_EN; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + } else { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + tmp16 &= ~AX_MEDIUM_JUMBO_EN; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + } + + return 0; +} + +static int ax88179_set_mac_addr(struct net_device *net, void *p) +{ + struct usbnet *dev = netdev_priv(net); + struct sockaddr *addr = p; + + if (netif_running(net)) + return -EBUSY; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(net->dev_addr, addr->sa_data, ETH_ALEN); + + /* Set the MAC address */ + return ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, + ETH_ALEN, net->dev_addr); +} + +static const struct net_device_ops ax88179_netdev_ops = { + .ndo_open = usbnet_open, + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_change_mtu = ax88179_change_mtu, + .ndo_set_mac_address = ax88179_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_do_ioctl = ax88179_ioctl, + .ndo_set_rx_mode = ax88179_set_multicast, + .ndo_set_features = ax88179_set_features, +}; + +static int ax88179_check_eeprom(struct usbnet *dev) +{ + u8 i, buf, eeprom[20]; + u16 csum, delay = HZ / 10; + unsigned long jtimeout; + + /* Read EEPROM content */ + for (i = 0; i < 6; i++) { + buf = i; + if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_SROM_ADDR, + 1, 1, &buf) < 0) + return -EINVAL; + + buf = EEP_RD; + if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_SROM_CMD, + 1, 1, &buf) < 0) + return -EINVAL; + + jtimeout = jiffies + delay; + do { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_SROM_CMD, + 1, 1, &buf); + + if (time_after(jiffies, jtimeout)) + return -EINVAL; + + } while (buf & EEP_BUSY); + + __ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_SROM_DATA_LOW, + 2, 2, &eeprom[i * 2], 0); + + if ((i == 0) && (eeprom[0] == 0xFF)) + return -EINVAL; + } + + csum = eeprom[6] + eeprom[7] + eeprom[8] + eeprom[9]; + csum = (csum >> 8) + (csum & 0xff); + if ((csum + eeprom[10]) != 0xff) + return -EINVAL; + + return 0; +} + +static int ax88179_check_efuse(struct usbnet *dev, u16 *ledmode) +{ + u8 i; + u8 efuse[64]; + u16 csum = 0; + + if (ax88179_read_cmd(dev, AX_ACCESS_EFUS, 0, 64, 64, efuse) < 0) + return -EINVAL; + + if (*efuse == 0xFF) + return -EINVAL; + + for (i = 0; i < 64; i++) + csum = csum + efuse[i]; + + while (csum > 255) + csum = (csum & 0x00FF) + ((csum >> 8) & 0x00FF); + + if (csum != 0xFF) + return -EINVAL; + + *ledmode = (efuse[51] << 8) | efuse[52]; + + return 0; +} + +static int ax88179_convert_old_led(struct usbnet *dev, u16 *ledvalue) +{ + u16 led; + + /* Loaded the old eFuse LED Mode */ + if (ax88179_read_cmd(dev, AX_ACCESS_EEPROM, 0x3C, 1, 2, &led) < 0) + return -EINVAL; + + led >>= 8; + switch (led) { + case 0xFF: + led = LED0_ACTIVE | LED1_LINK_10 | LED1_LINK_100 | + LED1_LINK_1000 | LED2_ACTIVE | LED2_LINK_10 | + LED2_LINK_100 | LED2_LINK_1000 | LED_VALID; + break; + case 0xFE: + led = LED0_ACTIVE | LED1_LINK_1000 | LED2_LINK_100 | LED_VALID; + break; + case 0xFD: + led = LED0_ACTIVE | LED1_LINK_1000 | LED2_LINK_100 | + LED2_LINK_10 | LED_VALID; + break; + case 0xFC: + led = LED0_ACTIVE | LED1_ACTIVE | LED1_LINK_1000 | LED2_ACTIVE | + LED2_LINK_100 | LED2_LINK_10 | LED_VALID; + break; + default: + led = LED0_ACTIVE | LED1_LINK_10 | LED1_LINK_100 | + LED1_LINK_1000 | LED2_ACTIVE | LED2_LINK_10 | + LED2_LINK_100 | LED2_LINK_1000 | LED_VALID; + break; + } + + *ledvalue = led; + + return 0; +} + +static int ax88179_led_setting(struct usbnet *dev) +{ + u8 ledfd, value = 0; + u16 tmp, ledact, ledlink, ledvalue = 0, delay = HZ / 10; + unsigned long jtimeout; + + /* Check AX88179 version. UA1 or UA2*/ + ax88179_read_cmd(dev, AX_ACCESS_MAC, GENERAL_STATUS, 1, 1, &value); + + if (!(value & AX_SECLD)) { /* UA1 */ + value = AX_GPIO_CTRL_GPIO3EN | AX_GPIO_CTRL_GPIO2EN | + AX_GPIO_CTRL_GPIO1EN; + if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_GPIO_CTRL, + 1, 1, &value) < 0) + return -EINVAL; + } + + /* Check EEPROM */ + if (!ax88179_check_eeprom(dev)) { + value = 0x42; + if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_SROM_ADDR, + 1, 1, &value) < 0) + return -EINVAL; + + value = EEP_RD; + if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_SROM_CMD, + 1, 1, &value) < 0) + return -EINVAL; + + jtimeout = jiffies + delay; + do { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_SROM_CMD, + 1, 1, &value); + + if (time_after(jiffies, jtimeout)) + return -EINVAL; + + } while (value & EEP_BUSY); + + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_SROM_DATA_HIGH, + 1, 1, &value); + ledvalue = (value << 8); + + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_SROM_DATA_LOW, + 1, 1, &value); + ledvalue |= value; + + /* load internal ROM for defaule setting */ + if ((ledvalue == 0xFFFF) || ((ledvalue & LED_VALID) == 0)) + ax88179_convert_old_led(dev, &ledvalue); + + } else if (!ax88179_check_efuse(dev, &ledvalue)) { + if ((ledvalue == 0xFFFF) || ((ledvalue & LED_VALID) == 0)) + ax88179_convert_old_led(dev, &ledvalue); + } else { + ax88179_convert_old_led(dev, &ledvalue); + } + + tmp = GMII_PHY_PGSEL_EXT; + ax88179_write_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_PHY_PAGE_SELECT, 2, &tmp); + + tmp = 0x2c; + ax88179_write_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_PHYPAGE, 2, &tmp); + + ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_LED_ACT, 2, &ledact); + + ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_LED_LINK, 2, &ledlink); + + ledact &= GMII_LED_ACTIVE_MASK; + ledlink &= GMII_LED_LINK_MASK; + + if (ledvalue & LED0_ACTIVE) + ledact |= GMII_LED0_ACTIVE; + + if (ledvalue & LED1_ACTIVE) + ledact |= GMII_LED1_ACTIVE; + + if (ledvalue & LED2_ACTIVE) + ledact |= GMII_LED2_ACTIVE; + + if (ledvalue & LED0_LINK_10) + ledlink |= GMII_LED0_LINK_10; + + if (ledvalue & LED1_LINK_10) + ledlink |= GMII_LED1_LINK_10; + + if (ledvalue & LED2_LINK_10) + ledlink |= GMII_LED2_LINK_10; + + if (ledvalue & LED0_LINK_100) + ledlink |= GMII_LED0_LINK_100; + + if (ledvalue & LED1_LINK_100) + ledlink |= GMII_LED1_LINK_100; + + if (ledvalue & LED2_LINK_100) + ledlink |= GMII_LED2_LINK_100; + + if (ledvalue & LED0_LINK_1000) + ledlink |= GMII_LED0_LINK_1000; + + if (ledvalue & LED1_LINK_1000) + ledlink |= GMII_LED1_LINK_1000; + + if (ledvalue & LED2_LINK_1000) + ledlink |= GMII_LED2_LINK_1000; + + tmp = ledact; + ax88179_write_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_LED_ACT, 2, &tmp); + + tmp = ledlink; + ax88179_write_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_LED_LINK, 2, &tmp); + + tmp = GMII_PHY_PGSEL_PAGE0; + ax88179_write_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_PHY_PAGE_SELECT, 2, &tmp); + + /* LED full duplex setting */ + ledfd = 0; + if (ledvalue & LED0_FD) + ledfd |= 0x01; + else if ((ledvalue & LED0_USB3_MASK) == 0) + ledfd |= 0x02; + + if (ledvalue & LED1_FD) + ledfd |= 0x04; + else if ((ledvalue & LED1_USB3_MASK) == 0) + ledfd |= 0x08; + + if (ledvalue & LED2_FD) + ledfd |= 0x10; + else if ((ledvalue & LED2_USB3_MASK) == 0) + ledfd |= 0x20; + + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_LEDCTRL, 1, 1, &ledfd); + + return 0; +} + +static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) +{ + u8 buf[5]; + u16 *tmp16; + u8 *tmp; + struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; + + usbnet_get_endpoints(dev, intf); + + tmp16 = (u16 *)buf; + tmp = (u8 *)buf; + + memset(ax179_data, 0, sizeof(*ax179_data)); + + /* Power up ethernet PHY */ + *tmp16 = 0; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); + *tmp16 = AX_PHYPWR_RSTCTL_IPRL; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); + msleep(200); + + *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); + msleep(100); + + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, + ETH_ALEN, dev->net->dev_addr); + memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); + + /* RX bulk configuration */ + memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp); + + dev->rx_urb_size = 1024 * 20; + + *tmp = 0x34; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_LOW, 1, 1, tmp); + + *tmp = 0x52; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH, + 1, 1, tmp); + + dev->net->netdev_ops = &ax88179_netdev_ops; + dev->net->ethtool_ops = &ax88179_ethtool_ops; + dev->net->needed_headroom = 8; + + /* Initialize MII structure */ + dev->mii.dev = dev->net; + dev->mii.mdio_read = ax88179_mdio_read; + dev->mii.mdio_write = ax88179_mdio_write; + dev->mii.phy_id_mask = 0xff; + dev->mii.reg_num_mask = 0xff; + dev->mii.phy_id = 0x03; + dev->mii.supports_gmii = 1; + + dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + + dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + + /* Enable checksum offload */ + *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | + AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RXCOE_CTL, 1, 1, tmp); + + *tmp = AX_TXCOE_IP | AX_TXCOE_TCP | AX_TXCOE_UDP | + AX_TXCOE_TCPV6 | AX_TXCOE_UDPV6; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, tmp); + + /* Configure RX control register => start operation */ + *tmp16 = AX_RX_CTL_DROPCRCERR | AX_RX_CTL_IPE | AX_RX_CTL_START | + AX_RX_CTL_AP | AX_RX_CTL_AMALL | AX_RX_CTL_AB; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, tmp16); + + *tmp = AX_MONITOR_MODE_PMETYPE | AX_MONITOR_MODE_PMEPOL | + AX_MONITOR_MODE_RWMP; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, 1, 1, tmp); + + /* Configure default medium type => giga */ + *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE | + AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, tmp16); + + ax88179_led_setting(dev); + + /* Restart autoneg */ + mii_nway_restart(&dev->mii); + + netif_carrier_off(dev->net); + + return 0; +} + +static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf) +{ + u16 tmp16; + + /* Configure RX control register => stop operation */ + tmp16 = AX_RX_CTL_STOP; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &tmp16); + + tmp16 = 0; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp16); + + /* Power down ethernet PHY */ + tmp16 = 0; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); +} + +static void +ax88179_rx_checksum(struct sk_buff *skb, u32 *pkt_hdr) +{ + skb->ip_summed = CHECKSUM_NONE; + + /* checksum error bit is set */ + if ((*pkt_hdr & AX_RXHDR_L3CSUM_ERR) || + (*pkt_hdr & AX_RXHDR_L4CSUM_ERR)) + return; + + /* It must be a TCP or UDP packet with a valid checksum */ + if (((*pkt_hdr & AX_RXHDR_L4_TYPE_MASK) == AX_RXHDR_L4_TYPE_TCP) || + ((*pkt_hdr & AX_RXHDR_L4_TYPE_MASK) == AX_RXHDR_L4_TYPE_UDP)) + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + struct sk_buff *ax_skb; + int pkt_cnt; + u32 rx_hdr; + u16 hdr_off; + u32 *pkt_hdr; + + skb_trim(skb, skb->len - 4); + memcpy(&rx_hdr, skb_tail_pointer(skb), 4); + le32_to_cpus(&rx_hdr); + + pkt_cnt = (u16)rx_hdr; + hdr_off = (u16)(rx_hdr >> 16); + pkt_hdr = (u32 *)(skb->data + hdr_off); + + while (pkt_cnt--) { + u16 pkt_len; + + le32_to_cpus(pkt_hdr); + pkt_len = (*pkt_hdr >> 16) & 0x1fff; + + /* Check CRC or runt packet */ + if ((*pkt_hdr & AX_RXHDR_CRC_ERR) || + (*pkt_hdr & AX_RXHDR_DROP_ERR)) { + skb_pull(skb, (pkt_len + 7) & 0xFFF8); + pkt_hdr++; + continue; + } + + if (pkt_cnt == 0) { + /* Skip IP alignment psudo header */ + skb_pull(skb, 2); + skb->len = pkt_len; + skb_set_tail_pointer(skb, pkt_len); + skb->truesize = pkt_len + sizeof(struct sk_buff); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; + } + + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (ax_skb) { + ax_skb->len = pkt_len; + ax_skb->data = skb->data + 2; + skb_set_tail_pointer(ax_skb, pkt_len); + ax_skb->truesize = pkt_len + sizeof(struct sk_buff); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); + } else { + return 0; + } + + skb_pull(skb, (pkt_len + 7) & 0xFFF8); + pkt_hdr++; + } + return 1; +} + +static struct sk_buff * +ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) +{ + u32 tx_hdr1, tx_hdr2; + int frame_size = dev->maxpacket; + int mss = skb_shinfo(skb)->gso_size; + int headroom; + int tailroom; + + tx_hdr1 = skb->len; + tx_hdr2 = mss; + if (((skb->len + 8) % frame_size) == 0) + tx_hdr2 |= 0x80008000; /* Enable padding */ + + skb_linearize(skb); + headroom = skb_headroom(skb); + tailroom = skb_tailroom(skb); + + if (!skb_header_cloned(skb) && + !skb_cloned(skb) && + (headroom + tailroom) >= 8) { + if (headroom < 8) { + skb->data = memmove(skb->head + 8, skb->data, skb->len); + skb_set_tail_pointer(skb, skb->len); + } + } else { + struct sk_buff *skb2; + + skb2 = skb_copy_expand(skb, 8, 0, flags); + dev_kfree_skb_any(skb); + skb = skb2; + if (!skb) + return NULL; + } + + skb_push(skb, 4); + cpu_to_le32s(&tx_hdr2); + skb_copy_to_linear_data(skb, &tx_hdr2, 4); + + skb_push(skb, 4); + cpu_to_le32s(&tx_hdr1); + skb_copy_to_linear_data(skb, &tx_hdr1, 4); + + return skb; +} + +static int ax88179_link_reset(struct usbnet *dev) +{ + struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; + u8 tmp[5], link_sts; + u16 mode, tmp16, delay = HZ / 10; + u32 tmp32 = 0x40000000; + unsigned long jtimeout; + + jtimeout = jiffies + delay; + while (tmp32 & 0x40000000) { + mode = 0; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &mode); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, + &ax179_data->rxctl); + + /*link up, check the usb device control TX FIFO full or empty*/ + ax88179_read_cmd(dev, 0x81, 0x8c, 0, 4, &tmp32); + + if (time_after(jiffies, jtimeout)) + return 0; + } + + mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE; + + ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS, + 1, 1, &link_sts); + + ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, + GMII_PHY_PHYSR, 2, &tmp16); + + if (!(tmp16 & GMII_PHY_PHYSR_LINK)) { + return 0; + } else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) { + mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ; + if (dev->net->mtu > 1500) + mode |= AX_MEDIUM_JUMBO_EN; + + if (link_sts & AX_USB_SS) + memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); + else if (link_sts & AX_USB_HS) + memcpy(tmp, &AX88179_BULKIN_SIZE[1], 5); + else + memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5); + } else if (GMII_PHY_PHYSR_100 == (tmp16 & GMII_PHY_PHYSR_SMASK)) { + mode |= AX_MEDIUM_PS; + + if (link_sts & (AX_USB_SS | AX_USB_HS)) + memcpy(tmp, &AX88179_BULKIN_SIZE[2], 5); + else + memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5); + } else { + memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5); + } + + /* RX bulk configuration */ + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp); + + dev->rx_urb_size = (1024 * (tmp[3] + 2)); + + if (tmp16 & GMII_PHY_PHYSR_FULL) + mode |= AX_MEDIUM_FULL_DUPLEX; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &mode); + + netif_carrier_on(dev->net); + + return 0; +} + +static int ax88179_reset(struct usbnet *dev) +{ + u8 buf[5]; + u16 *tmp16; + u8 *tmp; + + tmp16 = (u16 *)buf; + tmp = (u8 *)buf; + + /* Power up ethernet PHY */ + *tmp16 = 0; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); + + *tmp16 = AX_PHYPWR_RSTCTL_IPRL; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); + msleep(200); + + *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); + msleep(100); + + /* Ethernet PHY Auto Detach*/ + ax88179_auto_detach(dev, 0); + + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, + dev->net->dev_addr); + memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); + + /* RX bulk configuration */ + memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp); + + dev->rx_urb_size = 1024 * 20; + + *tmp = 0x34; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_LOW, 1, 1, tmp); + + *tmp = 0x52; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH, + 1, 1, tmp); + + dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + + dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + + /* Enable checksum offload */ + *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | + AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RXCOE_CTL, 1, 1, tmp); + + *tmp = AX_TXCOE_IP | AX_TXCOE_TCP | AX_TXCOE_UDP | + AX_TXCOE_TCPV6 | AX_TXCOE_UDPV6; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, tmp); + + /* Configure RX control register => start operation */ + *tmp16 = AX_RX_CTL_DROPCRCERR | AX_RX_CTL_IPE | AX_RX_CTL_START | + AX_RX_CTL_AP | AX_RX_CTL_AMALL | AX_RX_CTL_AB; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, tmp16); + + *tmp = AX_MONITOR_MODE_PMETYPE | AX_MONITOR_MODE_PMEPOL | + AX_MONITOR_MODE_RWMP; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, 1, 1, tmp); + + /* Configure default medium type => giga */ + *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE | + AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, tmp16); + + ax88179_led_setting(dev); + + /* Restart autoneg */ + mii_nway_restart(&dev->mii); + + netif_carrier_off(dev->net); + + return 0; +} + +static int ax88179_stop(struct usbnet *dev) +{ + u16 tmp16; + + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + tmp16 &= ~AX_MEDIUM_RECEIVE_EN; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + + return 0; +} + +static const struct driver_info ax88179_info = { + .description = "ASIX AX88179 USB 3.0 Gigibit Ethernet", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + +static const struct driver_info ax88178a_info = { + .description = "ASIX AX88178A USB 2.0 Gigibit Ethernet", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + +static const struct driver_info sitecom_info = { + .description = "Sitecom USB 3.0 to Gigabit Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + +static const struct usb_device_id products[] = { +{ + /* ASIX AX88179 10/100/1000 */ + USB_DEVICE(0x0b95, 0x1790), + .driver_info = (unsigned long)&ax88179_info, +}, { + /* ASIX AX88178A 10/100/1000 */ + USB_DEVICE(0x0b95, 0x178a), + .driver_info = (unsigned long)&ax88178a_info, +}, { + /* Sitecom USB 3.0 to Gigabit Adapter */ + USB_DEVICE(0x0df6, 0x0072), + .driver_info = (unsigned long) &sitecom_info, +}, + { }, +}; +MODULE_DEVICE_TABLE(usb, products); + +static struct usb_driver ax88179_178a_driver = { + .name = "ax88179_178a", + .id_table = products, + .probe = usbnet_probe, + .suspend = ax88179_suspend, + .resume = ax88179_resume, + .disconnect = usbnet_disconnect, + .supports_autosuspend = 1, + .disable_hub_initiated_lpm = 1, +}; + +module_usb_driver(ax88179_178a_driver); + +MODULE_DESCRIPTION("ASIX AX88179/178A based USB 3.0/2.0 Gigabit Ethernet Devices"); +MODULE_LICENSE("GPL"); From d6e89c0b7660bd725a0948126f9db1a327f48d76 Mon Sep 17 00:00:00 2001 From: Silviu-Mihai Popescu Date: Sat, 2 Mar 2013 09:45:19 +0000 Subject: [PATCH 091/261] caif_dev: fix sparse warnings for caif_flow_cb This fixed the following sparse warning: net/caif/caif_dev.c:121:6: warning: symbol 'caif_flow_cb' was not declared. Should it be static? Signed-off-by: Silviu-Mihai Popescu Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1ae1d9cb278d..21760f008974 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -118,7 +118,7 @@ static struct caif_device_entry *caif_get(struct net_device *dev) return NULL; } -void caif_flow_cb(struct sk_buff *skb) +static void caif_flow_cb(struct sk_buff *skb) { struct caif_device_entry *caifd; void (*dtor)(struct sk_buff *skb) = NULL; From 15516f123a849084c6bb8dec40f2acb2ce69d639 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Sat, 2 Mar 2013 15:53:39 +0000 Subject: [PATCH 092/261] MAINTAINERS: remove 3c505 This driver was removed by commit 0e245db (drivers/net: delete the 3Com 3c505/3c507 intel i825xx support). Cc: Paul Gortmaker Cc: Philip Blundell Cc: netdev@vger.kernel.org Signed-off-by: Cesar Eduardo Barros Signed-off-by: David S. Miller --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1e5c3a4ece7b..86db49ec28d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -113,12 +113,6 @@ Maintainers List (try to look for most precise areas first) ----------------------------------- -3C505 NETWORK DRIVER -M: Philip Blundell -L: netdev@vger.kernel.org -S: Maintained -F: drivers/net/ethernet/i825xx/3c505* - 3C59X NETWORK DRIVER M: Steffen Klassert L: netdev@vger.kernel.org From 6b8a12bab337702c804deba9a1860ac042f69002 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Sat, 2 Mar 2013 15:53:43 +0000 Subject: [PATCH 093/261] MAINTAINERS: remove drivers/net/wan/cycx* This driver was removed by commit 6fcdf4f (wanrouter: delete now orphaned header content, files/drivers). Cc: Paul Gortmaker Cc: Arnaldo Carvalho de Melo Cc: netdev@vger.kernel.org Signed-off-by: Cesar Eduardo Barros Signed-off-by: David S. Miller --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 86db49ec28d0..7ca15c56ee1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2353,12 +2353,6 @@ W: http://www.arm.linux.org.uk/ S: Maintained F: drivers/video/cyber2000fb.* -CYCLADES 2X SYNC CARD DRIVER -M: Arnaldo Carvalho de Melo -W: http://oops.ghostprotocols.net:81/blog -S: Maintained -F: drivers/net/wan/cycx* - CYCLADES ASYNC MUX DRIVER W: http://www.cyclades.com/ S: Orphan From b711c12113620081708c6ccd43768b5a2501e2ab Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Sat, 2 Mar 2013 15:53:45 +0000 Subject: [PATCH 094/261] MAINTAINERS: remove eexpress This driver was removed by commit f84932d (drivers/net: delete ISA intel eexpress and eepro i825xx drivers). Cc: Paul Gortmaker Cc: Philip Blundell Cc: netdev@vger.kernel.org Signed-off-by: Cesar Eduardo Barros Signed-off-by: David S. Miller --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7ca15c56ee1b..f79abf0f0122 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3046,12 +3046,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kristoffer/linux-hpc.git F: drivers/video/s1d13xxxfb.c F: include/video/s1d13xxxfb.h -ETHEREXPRESS-16 NETWORK DRIVER -M: Philip Blundell -L: netdev@vger.kernel.org -S: Maintained -F: drivers/net/ethernet/i825xx/eexpress.* - ETHERNET BRIDGE M: Stephen Hemminger L: bridge@lists.linux-foundation.org From 51cd02d43c0bc7f55c84f50de23c08554db56ce1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Mar 2013 16:20:50 +0800 Subject: [PATCH 095/261] ASoC: wm8350: Use jiffies rather than msecs in schedule_delayed_work() The delay parameter of schedule_delayed_work() is number of jiffies to wait rather than miliseconds. Before commit 6d3c26bcb "ASoC: Use delayed work to debounce WM8350 jack IRQs", the debounce time is 200 miliseconds in wm8350_hp_jack_handler(). So I think this is a bug when convert to use delayed work. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/wm8350.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index fb92fb47d636..1db957d54766 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1303,7 +1303,7 @@ static irqreturn_t wm8350_hpl_jack_handler(int irq, void *data) if (device_may_wakeup(wm8350->dev)) pm_wakeup_event(wm8350->dev, 250); - schedule_delayed_work(&priv->hpl.work, 200); + schedule_delayed_work(&priv->hpl.work, msecs_to_jiffies(200)); return IRQ_HANDLED; } @@ -1320,7 +1320,7 @@ static irqreturn_t wm8350_hpr_jack_handler(int irq, void *data) if (device_may_wakeup(wm8350->dev)) pm_wakeup_event(wm8350->dev, 250); - schedule_delayed_work(&priv->hpr.work, 200); + schedule_delayed_work(&priv->hpr.work, msecs_to_jiffies(200)); return IRQ_HANDLED; } From f3e227df8235fc0bf8ba08304aa135066ec9b9b0 Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Mon, 25 Feb 2013 04:05:38 +0530 Subject: [PATCH 096/261] drm/i915: Fix missing variable initilization Need to initialize the variable wait to false. Signed-off-by: Syam Sidhardhan Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 816c45c71b72..24debd6066a7 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1391,8 +1391,8 @@ void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder) struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = encoder->dev->dev_private; enum port port = intel_dig_port->port; - bool wait; uint32_t val; + bool wait = false; if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { val = I915_READ(DDI_BUF_CTL(port)); From b18ac466956c7e7b5abf7a2d6adf8c626267d0ae Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Mon, 18 Feb 2013 19:00:24 -0300 Subject: [PATCH 097/261] drm/i915: wait_event_timeout's timeout is in jiffies So use msecs_to_jiffies(10) to make the timeout the same as in the "!has_aux_irq" case. This patch was initially written by Daniel Vetter and posted on pastebin a few weeks ago. I'm just bringing it to the mailing list. Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7b8bfe8982e6..50cd7ac28c2c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -353,7 +353,8 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) if (has_aux_irq) - done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, 10); + done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, + msecs_to_jiffies(10)); else done = wait_for_atomic(C, 10) == 0; if (!done) From 4a35f83b2b7c6aae3fc0d1c4554fdc99dc33ad07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 22 Feb 2013 16:53:38 +0200 Subject: [PATCH 098/261] drm/i915: Don't clobber crtc->fb when queue_flip fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore crtc->fb to the old framebuffer if queue_flip fails. While at it, kill the pointless intel_fb temp variable. v2: Update crtc->fb before queue_flip and restore it back after a failure. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Reported-and-Tested-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0ff10b3af9ea..09659ff6d24d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7264,8 +7264,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_framebuffer *intel_fb; - struct drm_i915_gem_object *obj; + struct drm_framebuffer *old_fb = crtc->fb; + struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)->obj; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_unpin_work *work; unsigned long flags; @@ -7290,8 +7290,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, work->event = event; work->crtc = crtc; - intel_fb = to_intel_framebuffer(crtc->fb); - work->old_fb_obj = intel_fb->obj; + work->old_fb_obj = to_intel_framebuffer(old_fb)->obj; INIT_WORK(&work->work, intel_unpin_work_fn); ret = drm_vblank_get(dev, intel_crtc->pipe); @@ -7311,9 +7310,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_crtc->unpin_work = work; spin_unlock_irqrestore(&dev->event_lock, flags); - intel_fb = to_intel_framebuffer(fb); - obj = intel_fb->obj; - if (atomic_read(&intel_crtc->unpin_work_count) >= 2) flush_workqueue(dev_priv->wq); @@ -7348,6 +7344,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); + crtc->fb = old_fb; drm_gem_object_unreference(&work->old_fb_obj->base); drm_gem_object_unreference(&obj->base); mutex_unlock(&dev->struct_mutex); From 86c268ed0f9b3b4d51d81dd8fcec533a164414d1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 1 Mar 2013 17:00:50 -0800 Subject: [PATCH 099/261] drm/i915: Fix Haswell/CRW PCI IDs. The second digit was off by one, which meant we accidentally treated GT(n) as GT(n-1). This also meant no support for GT1 at all. Cc: stable@kernel.org Signed-off-by: Kenneth Graunke Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c5b8c81b9440..2c5ee965e473 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -379,15 +379,15 @@ static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_VGA_DEVICE(0x0A06, &intel_haswell_m_info), /* ULT GT1 mobile */ INTEL_VGA_DEVICE(0x0A16, &intel_haswell_m_info), /* ULT GT2 mobile */ INTEL_VGA_DEVICE(0x0A26, &intel_haswell_m_info), /* ULT GT2 mobile */ - INTEL_VGA_DEVICE(0x0D12, &intel_haswell_d_info), /* CRW GT1 desktop */ + INTEL_VGA_DEVICE(0x0D02, &intel_haswell_d_info), /* CRW GT1 desktop */ + INTEL_VGA_DEVICE(0x0D12, &intel_haswell_d_info), /* CRW GT2 desktop */ INTEL_VGA_DEVICE(0x0D22, &intel_haswell_d_info), /* CRW GT2 desktop */ - INTEL_VGA_DEVICE(0x0D32, &intel_haswell_d_info), /* CRW GT2 desktop */ - INTEL_VGA_DEVICE(0x0D1A, &intel_haswell_d_info), /* CRW GT1 server */ + INTEL_VGA_DEVICE(0x0D0A, &intel_haswell_d_info), /* CRW GT1 server */ + INTEL_VGA_DEVICE(0x0D1A, &intel_haswell_d_info), /* CRW GT2 server */ INTEL_VGA_DEVICE(0x0D2A, &intel_haswell_d_info), /* CRW GT2 server */ - INTEL_VGA_DEVICE(0x0D3A, &intel_haswell_d_info), /* CRW GT2 server */ - INTEL_VGA_DEVICE(0x0D16, &intel_haswell_m_info), /* CRW GT1 mobile */ + INTEL_VGA_DEVICE(0x0D06, &intel_haswell_m_info), /* CRW GT1 mobile */ + INTEL_VGA_DEVICE(0x0D16, &intel_haswell_m_info), /* CRW GT2 mobile */ INTEL_VGA_DEVICE(0x0D26, &intel_haswell_m_info), /* CRW GT2 mobile */ - INTEL_VGA_DEVICE(0x0D36, &intel_haswell_m_info), /* CRW GT2 mobile */ INTEL_VGA_DEVICE(0x0f30, &intel_valleyview_m_info), INTEL_VGA_DEVICE(0x0157, &intel_valleyview_m_info), INTEL_VGA_DEVICE(0x0155, &intel_valleyview_d_info), From 30a1b5ef0c953a7ba82169c46f92f52fa11ee15e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 8 Feb 2013 23:02:33 +0100 Subject: [PATCH 100/261] ARM: 7642/1: netx: bump IRQ offset to 64 The Netx IRQs offset from zero, which is illegal, since Linux IRQ 0 is NO_IRQ. Acked-by: Sascha Hauer Reviewed-by: Jamie Iles Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-netx/generic.c | 2 +- arch/arm/mach-netx/include/mach/irqs.h | 64 +++++++++++++------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index 27c2cb7ab813..1504b68f4c66 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -168,7 +168,7 @@ void __init netx_init_irq(void) { int irq; - vic_init(io_p2v(NETX_PA_VIC), 0, ~0, 0); + vic_init(io_p2v(NETX_PA_VIC), NETX_IRQ_VIC_START, ~0, 0); for (irq = NETX_IRQ_HIF_CHAINED(0); irq <= NETX_IRQ_HIF_LAST; irq++) { irq_set_chip_and_handler(irq, &netx_hif_chip, diff --git a/arch/arm/mach-netx/include/mach/irqs.h b/arch/arm/mach-netx/include/mach/irqs.h index 6ce914d54a30..8f74a844a775 100644 --- a/arch/arm/mach-netx/include/mach/irqs.h +++ b/arch/arm/mach-netx/include/mach/irqs.h @@ -17,42 +17,42 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define NETX_IRQ_VIC_START 0 -#define NETX_IRQ_SOFTINT 0 -#define NETX_IRQ_TIMER0 1 -#define NETX_IRQ_TIMER1 2 -#define NETX_IRQ_TIMER2 3 -#define NETX_IRQ_SYSTIME_NS 4 -#define NETX_IRQ_SYSTIME_S 5 -#define NETX_IRQ_GPIO_15 6 -#define NETX_IRQ_WATCHDOG 7 -#define NETX_IRQ_UART0 8 -#define NETX_IRQ_UART1 9 -#define NETX_IRQ_UART2 10 -#define NETX_IRQ_USB 11 -#define NETX_IRQ_SPI 12 -#define NETX_IRQ_I2C 13 -#define NETX_IRQ_LCD 14 -#define NETX_IRQ_HIF 15 -#define NETX_IRQ_GPIO_0_14 16 -#define NETX_IRQ_XPEC0 17 -#define NETX_IRQ_XPEC1 18 -#define NETX_IRQ_XPEC2 19 -#define NETX_IRQ_XPEC3 20 -#define NETX_IRQ_XPEC(no) (17 + (no)) -#define NETX_IRQ_MSYNC0 21 -#define NETX_IRQ_MSYNC1 22 -#define NETX_IRQ_MSYNC2 23 -#define NETX_IRQ_MSYNC3 24 -#define NETX_IRQ_IRQ_PHY 25 -#define NETX_IRQ_ISO_AREA 26 +#define NETX_IRQ_VIC_START 64 +#define NETX_IRQ_SOFTINT (NETX_IRQ_VIC_START + 0) +#define NETX_IRQ_TIMER0 (NETX_IRQ_VIC_START + 1) +#define NETX_IRQ_TIMER1 (NETX_IRQ_VIC_START + 2) +#define NETX_IRQ_TIMER2 (NETX_IRQ_VIC_START + 3) +#define NETX_IRQ_SYSTIME_NS (NETX_IRQ_VIC_START + 4) +#define NETX_IRQ_SYSTIME_S (NETX_IRQ_VIC_START + 5) +#define NETX_IRQ_GPIO_15 (NETX_IRQ_VIC_START + 6) +#define NETX_IRQ_WATCHDOG (NETX_IRQ_VIC_START + 7) +#define NETX_IRQ_UART0 (NETX_IRQ_VIC_START + 8) +#define NETX_IRQ_UART1 (NETX_IRQ_VIC_START + 9) +#define NETX_IRQ_UART2 (NETX_IRQ_VIC_START + 10) +#define NETX_IRQ_USB (NETX_IRQ_VIC_START + 11) +#define NETX_IRQ_SPI (NETX_IRQ_VIC_START + 12) +#define NETX_IRQ_I2C (NETX_IRQ_VIC_START + 13) +#define NETX_IRQ_LCD (NETX_IRQ_VIC_START + 14) +#define NETX_IRQ_HIF (NETX_IRQ_VIC_START + 15) +#define NETX_IRQ_GPIO_0_14 (NETX_IRQ_VIC_START + 16) +#define NETX_IRQ_XPEC0 (NETX_IRQ_VIC_START + 17) +#define NETX_IRQ_XPEC1 (NETX_IRQ_VIC_START + 18) +#define NETX_IRQ_XPEC2 (NETX_IRQ_VIC_START + 19) +#define NETX_IRQ_XPEC3 (NETX_IRQ_VIC_START + 20) +#define NETX_IRQ_XPEC(no) (NETX_IRQ_VIC_START + 17 + (no)) +#define NETX_IRQ_MSYNC0 (NETX_IRQ_VIC_START + 21) +#define NETX_IRQ_MSYNC1 (NETX_IRQ_VIC_START + 22) +#define NETX_IRQ_MSYNC2 (NETX_IRQ_VIC_START + 23) +#define NETX_IRQ_MSYNC3 (NETX_IRQ_VIC_START + 24) +#define NETX_IRQ_IRQ_PHY (NETX_IRQ_VIC_START + 25) +#define NETX_IRQ_ISO_AREA (NETX_IRQ_VIC_START + 26) /* int 27 is reserved */ /* int 28 is reserved */ -#define NETX_IRQ_TIMER3 29 -#define NETX_IRQ_TIMER4 30 +#define NETX_IRQ_TIMER3 (NETX_IRQ_VIC_START + 29) +#define NETX_IRQ_TIMER4 (NETX_IRQ_VIC_START + 30) /* int 31 is reserved */ -#define NETX_IRQS 32 +#define NETX_IRQS (NETX_IRQ_VIC_START + 32) /* for multiplexed irqs on gpio 0..14 */ #define NETX_IRQ_GPIO(x) (NETX_IRQS + (x)) From 78305c863074f809f09a96ea97b86d0f0c1c8399 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 15 Feb 2013 13:30:58 +0100 Subject: [PATCH 101/261] ARM: 7652/1: mm: fix missing use of 'asid' to get asid value from mm->context.id Fix missing use of the asid macro when getting the ASID from the mm->context.id field. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mm/proc-v7-3level.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 50bf1dafc9ea..6ffd78c0f9ab 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -48,7 +48,7 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id - and r3, r1, #0xff + asid r3, r1 mov r3, r3, lsl #(48 - 32) @ ASID mcrr p15, 0, r0, r3, c2 @ set TTB 0 isb From 904464b91eca8c665acea033489225af02eeb75a Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 22 Feb 2013 14:01:42 +0100 Subject: [PATCH 102/261] ARM: 7655/1: smp_twd: make twd_local_timer_of_register() no-op for nosmp When booting a SMP build kernel with nosmp on kernel cmdline, the following fat warning will be hit. ------------[ cut here ]------------ WARNING: at arch/arm/kernel/smp_twd.c:345 twd_local_timer_of_register+0x7c/0x90() twd_local_timer_of_register failed (-6) Modules linked in: Backtrace: [<80011f14>] (dump_backtrace+0x0/0x10c) from [<8044dd30>] (dump_stack+0x18/0x1c) r7:805e9f58 r6:805ba84c r5:80539331 r4:00000159 [<8044dd18>] (dump_stack+0x0/0x1c) from [<80020fbc>] (warn_slowpath_common+0x54/0x6c) [<80020f68>] (warn_slowpath_common+0x0/0x6c) from [<80021078>] (warn_slowpath_fmt+0x38/0x40) r9:412fc09a r8:8fffffff r7:ffffffff r6:00000001 r5:80633b8c r4:80b32da8 [<80021040>] (warn_slowpath_fmt+0x0/0x40) from [<805ba84] (twd_local_timer_of_register+0x7c/0x90) r3:fffffffa r2:8053934b [<805ba7d0>] (twd_local_timer_of_register+0x0/0x90) from [<805c0bec>] (imx6q_timer_init+0x18/0x4c) r5:80633800 r4:8053b701 [<805c0bd4>] (imx6q_timer_init+0x0/0x4c) from [<805ba4e8>] (time_init+0x28/0x38) r5:80633800 r4:805dc0f4 [<805ba4c0>] (time_init+0x0/0x38) from [<805b6854>] (start_kernel+0x1a0/0x310) [<805b66b4>] (start_kernel+0x0/0x310) from [<10008044>] (0x10008044) r8:1000406a r7:805f3f8c r6:805dc0c4 r5:805f0518 r4:10c5387d ---[ end trace 1b75b31a2719ed1c ]--- Check (!is_smp() || !setup_max_cpus) in twd_local_timer_of_register() to make it be a no-op for the conditions, thus avoid above warning. Reported-by: Dirk Behme Signed-off-by: Shawn Guo Signed-off-by: Russell King --- arch/arm/kernel/smp_twd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index c092115d903a..3f2565037480 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -373,6 +374,9 @@ void __init twd_local_timer_of_register(void) struct device_node *np; int err; + if (!is_smp() || !setup_max_cpus) + return; + np = of_find_matching_node(NULL, twd_of_match); if (!np) return; From d61947a164760ac520cb416768afdf38c33d60e7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2013 17:46:16 +0100 Subject: [PATCH 103/261] ARM: 7657/1: head: fix swapper and idmap population with LPAE and big-endian The LPAE page table format uses 64-bit descriptors, so we need to take endianness into account when populating the swapper and idmap tables during early initialisation. This patch ensures that we store the two words making up each page table entry in the correct order when running big-endian. Cc: Acked-by: Catalin Marinas Tested-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/head.S | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 486a15ae9011..e0eb9a1cae77 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -184,13 +184,22 @@ __create_page_tables: orr r3, r3, #3 @ PGD block type mov r6, #4 @ PTRS_PER_PGD mov r7, #1 << (55 - 32) @ L_PGD_SWAPPER -1: str r3, [r0], #4 @ set bottom PGD entry bits +1: +#ifdef CONFIG_CPU_ENDIAN_BE8 str r7, [r0], #4 @ set top PGD entry bits + str r3, [r0], #4 @ set bottom PGD entry bits +#else + str r3, [r0], #4 @ set bottom PGD entry bits + str r7, [r0], #4 @ set top PGD entry bits +#endif add r3, r3, #0x1000 @ next PMD table subs r6, r6, #1 bne 1b add r4, r4, #0x1000 @ point to the PMD tables +#ifdef CONFIG_CPU_ENDIAN_BE8 + add r4, r4, #4 @ we only write the bottom word +#endif #endif ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags @@ -258,6 +267,11 @@ __create_page_tables: addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] +#if defined(CONFIG_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) + sub r4, r4, #4 @ Fixup page table pointer + @ for 64-bit descriptors +#endif + #ifdef CONFIG_DEBUG_LL #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING) /* @@ -276,12 +290,16 @@ __create_page_tables: orr r3, r7, r3, lsl #SECTION_SHIFT #ifdef CONFIG_ARM_LPAE mov r7, #1 << (54 - 32) @ XN +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r7, [r0], #4 + str r3, [r0], #4 +#else + str r3, [r0], #4 + str r7, [r0], #4 +#endif #else orr r3, r3, #PMD_SECT_XN -#endif str r3, [r0], #4 -#ifdef CONFIG_ARM_LPAE - str r7, [r0], #4 #endif #else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */ From 37f47e3d62533c931b04cb409f2eb299e6342331 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2013 17:47:20 +0100 Subject: [PATCH 104/261] ARM: 7658/1: mm: fix race updating mm->context.id on ASID rollover If a thread triggers an ASID rollover, other threads of the same process must be made to wait until the mm->context.id for the shared mm_struct has been updated to new generation and associated book-keeping (e.g. TLB invalidation) has ben performed. However, there is a *tiny* window where both mm->context.id and the relevant active_asids entry are updated to the new generation, but the TLB flush has not been performed, which could allow another thread to return to userspace with a dirty TLB, potentially leading to data corruption. In reality this will never occur because one CPU would need to perform a context-switch in the time it takes another to do a couple of atomic test/set operations but we should plug the race anyway. This patch moves the active_asids update until after the potential TLB flush on context-switch. Cc: # 3.8 Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 7a0511191f6b..03ba181e359c 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -207,11 +207,11 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) new_context(mm, cpu); - atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); - cpumask_set_cpu(cpu, mm_cpumask(mm)); - if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) local_flush_tlb_all(); + + atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); + cpumask_set_cpu(cpu, mm_cpumask(mm)); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: From 8a4e3a9ead7e37ce1505602b564c15da09ac039f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2013 17:47:36 +0100 Subject: [PATCH 105/261] ARM: 7659/1: mm: make mm->context.id an atomic64_t variable mm->context.id is updated under asid_lock when a new ASID is allocated to an mm_struct. However, it is also read without the lock when a task is being scheduled and checking whether or not the current ASID generation is up-to-date. If two threads of the same process are being scheduled in parallel and the bottom bits of the generation in their mm->context.id match the current generation (that is, the mm_struct has not been used for ~2^24 rollovers) then the non-atomic, lockless access to mm->context.id may yield the incorrect ASID. This patch fixes this issue by making mm->context.id and atomic64_t, ensuring that the generation is always read consistently. For code that only requires access to the ASID bits (e.g. TLB flushing by mm), then the value is accessed directly, which GCC converts to an ldrb. Cc: # 3.8 Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/mmu.h | 8 ++++---- arch/arm/include/asm/mmu_context.h | 2 +- arch/arm/kernel/asm-offsets.c | 2 +- arch/arm/mm/context.c | 21 +++++++++++++-------- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 9f77e7804f3b..e3d55547e755 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -5,15 +5,15 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID - u64 id; + atomic64_t id; #endif - unsigned int vmalloc_seq; + unsigned int vmalloc_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID #define ASID_BITS 8 #define ASID_MASK ((~0ULL) << ASID_BITS) -#define ASID(mm) ((mm)->context.id & ~ASID_MASK) +#define ASID(mm) ((mm)->context.id.counter & ~ASID_MASK) #else #define ASID(mm) (0) #endif @@ -26,7 +26,7 @@ typedef struct { * modified for 2.6 by Hyok S. Choi */ typedef struct { - unsigned long end_brk; + unsigned long end_brk; } mm_context_t; #endif diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index e1f644bc7cc5..863a6611323c 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -25,7 +25,7 @@ void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); -#define init_new_context(tsk,mm) ({ mm->context.id = 0; }) +#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) #else /* !CONFIG_CPU_HAS_ASID */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 5ce738b43508..923eec7105cf 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -110,7 +110,7 @@ int main(void) BLANK(); #endif #ifdef CONFIG_CPU_HAS_ASID - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); #endif DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 03ba181e359c..44d4ee52f3e2 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -152,9 +152,9 @@ static int is_reserved_asid(u64 asid) return 0; } -static void new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm, unsigned int cpu) { - u64 asid = mm->context.id; + u64 asid = atomic64_read(&mm->context.id); u64 generation = atomic64_read(&asid_generation); if (asid != 0 && is_reserved_asid(asid)) { @@ -181,13 +181,14 @@ static void new_context(struct mm_struct *mm, unsigned int cpu) cpumask_clear(mm_cpumask(mm)); } - mm->context.id = asid; + return asid; } void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { unsigned long flags; unsigned int cpu = smp_processor_id(); + u64 asid; if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) __check_vmalloc_seq(mm); @@ -198,19 +199,23 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) */ cpu_set_reserved_ttbr0(); - if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) - && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id)) + asid = atomic64_read(&mm->context.id); + if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), asid)) goto switch_mm_fastpath; raw_spin_lock_irqsave(&cpu_asid_lock, flags); /* Check that our ASID belongs to the current generation. */ - if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) - new_context(mm, cpu); + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) local_flush_tlb_all(); - atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); + atomic64_set(&per_cpu(active_asids, cpu), asid); cpumask_set_cpu(cpu, mm_cpumask(mm)); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); From 862c588f062fe9339a180cf6429e4df1855c376a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2013 17:48:11 +0100 Subject: [PATCH 106/261] ARM: 7660/1: tlb: add branch predictor maintenance operations The ARM architecture requires explicit branch predictor maintenance when updating an instruction stream for a given virtual address. In reality, this isn't so much of a burden because the branch predictor is flushed during the cache maintenance required to make the new instructions visible to the I-side of the processor. However, there are still some cases where explicit flushing is required, so add a local_bp_flush_all operation to deal with this. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/tlbflush.h | 34 +++++++++++++++++++++++++++------ arch/arm/kernel/smp_tlb.c | 12 ++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 6e924d3a77eb..4db8c8820f0d 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -34,10 +34,13 @@ #define TLB_V6_D_ASID (1 << 17) #define TLB_V6_I_ASID (1 << 18) +#define TLB_V6_BP (1 << 19) + /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */ -#define TLB_V7_UIS_PAGE (1 << 19) -#define TLB_V7_UIS_FULL (1 << 20) -#define TLB_V7_UIS_ASID (1 << 21) +#define TLB_V7_UIS_PAGE (1 << 20) +#define TLB_V7_UIS_FULL (1 << 21) +#define TLB_V7_UIS_ASID (1 << 22) +#define TLB_V7_UIS_BP (1 << 23) #define TLB_BARRIER (1 << 28) #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ @@ -150,7 +153,8 @@ #define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ TLB_V6_I_FULL | TLB_V6_D_FULL | \ TLB_V6_I_PAGE | TLB_V6_D_PAGE | \ - TLB_V6_I_ASID | TLB_V6_D_ASID) + TLB_V6_I_ASID | TLB_V6_D_ASID | \ + TLB_V6_BP) #ifdef CONFIG_CPU_TLB_V6 # define v6wbi_possible_flags v6wbi_tlb_flags @@ -166,9 +170,11 @@ #endif #define v7wbi_tlb_flags_smp (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ - TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) + TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \ + TLB_V7_UIS_ASID | TLB_V7_UIS_BP) #define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ - TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID) + TLB_V6_U_FULL | TLB_V6_U_PAGE | \ + TLB_V6_U_ASID | TLB_V6_BP) #ifdef CONFIG_CPU_TLB_V7 @@ -430,6 +436,20 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) } } +static inline void local_flush_bp_all(void) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_V7_UIS_BP)) + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); + else if (tlb_flag(TLB_V6_BP)) + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero)); + + if (tlb_flag(TLB_BARRIER)) + isb(); +} + /* * flush_pmd_entry * @@ -480,6 +500,7 @@ static inline void clean_pmd_entry(void *pmd) #define flush_tlb_kernel_page local_flush_tlb_kernel_page #define flush_tlb_range local_flush_tlb_range #define flush_tlb_kernel_range local_flush_tlb_kernel_range +#define flush_bp_all local_flush_bp_all #else extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); @@ -487,6 +508,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr); extern void flush_tlb_kernel_page(unsigned long kaddr); extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void flush_bp_all(void); #endif /* diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 02c5d2ce23bf..bd0300531399 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -64,6 +64,11 @@ static inline void ipi_flush_tlb_kernel_range(void *arg) local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); } +static inline void ipi_flush_bp_all(void *ignored) +{ + local_flush_bp_all(); +} + void flush_tlb_all(void) { if (tlb_ops_need_broadcast()) @@ -127,3 +132,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) local_flush_tlb_kernel_range(start, end); } +void flush_bp_all(void) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu(ipi_flush_bp_all, NULL, 1); + else + local_flush_bp_all(); +} From 89c7e4b8bbb3d4fa52df5746a8ad38e610143651 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2013 17:48:40 +0100 Subject: [PATCH 107/261] ARM: 7661/1: mm: perform explicit branch predictor maintenance when required The ARM ARM requires branch predictor maintenance if, for a given ASID, the instructions at a specific virtual address appear to change. From the kernel's point of view, that means: - Changing the kernel's view of memory (e.g. switching to the identity map) - ASID rollover (since ASIDs will be re-allocated to new tasks) This patch adds explicit branch predictor maintenance when either of the two conditions above are met. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 1 + arch/arm/kernel/suspend.c | 1 + arch/arm/mm/context.c | 4 +++- arch/arm/mm/idmap.c | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1bdfd87c8e41..31644f1978d5 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -285,6 +285,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) * switch away from it before attempting any exclusive accesses. */ cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); enter_lazy_tlb(mm, current); local_flush_tlb_all(); diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 358bca3a995e..c59c97ea8268 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -68,6 +68,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend(arg, fn); if (ret == 0) { cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); local_flush_tlb_all(); } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 44d4ee52f3e2..a5a4b2bc42ba 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -212,8 +212,10 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) atomic64_set(&mm->context.id, asid); } - if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { + local_flush_bp_all(); local_flush_tlb_all(); + } atomic64_set(&per_cpu(active_asids, cpu), asid); cpumask_set_cpu(cpu, mm_cpumask(mm)); diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 2dffc010cc41..5ee505c937d1 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -141,6 +141,7 @@ void setup_mm_for_reboot(void) { /* Switch to the identity mapping. */ cpu_switch_mm(idmap_pgd, &init_mm); + local_flush_bp_all(); #ifdef CONFIG_CPU_HAS_ASID /* From 1a8e611874da714ee7ef1e92e5160b38dc54959b Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Thu, 28 Feb 2013 17:48:57 +0100 Subject: [PATCH 108/261] ARM: 7662/1: hw_breakpoint: reset debug logic on secondary CPUs in s2ram resume We must mask out the CPU_TASKS_FROZEN bit so that reset_ctrl_regs is also called on a secondary CPU during s2ram resume, where only the boot CPU will receive the PM_EXIT notification. Signed-off-by: Dietmar Eggemann Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 5eae53e7a2e1..96093b75ab90 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1023,7 +1023,7 @@ out_mdbgen: static int __cpuinit dbg_reset_notify(struct notifier_block *self, unsigned long action, void *cpu) { - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); return NOTIFY_OK; From f2fe09b055e2549de41fb107b34c60bac4a1b0cf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2013 17:49:11 +0100 Subject: [PATCH 109/261] ARM: 7663/1: perf: fix ARMv7 EVTYPE_MASK to include NSH bit Masked out PMXEVTYPER.NSH means that we can't enable profiling at PL2, regardless of the settings in the HDCR. This patch fixes the broken mask. Cc: Reported-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event_v7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8c79a9e70b83..039cffb053a7 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -774,7 +774,7 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] /* * PMXEVTYPER: Event selection reg */ -#define ARMV7_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ +#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ #define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ /* From e595ede6050b1ce982d74f7084f93715bcc32359 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 28 Feb 2013 17:51:29 +0100 Subject: [PATCH 110/261] ARM: 7664/1: perf: remove erroneous semicolon from event initialisation Commit 9dcbf466559f ("ARM: perf: simplify __hw_perf_event_init err handling") tidied up the error handling code for perf event initialisation on ARM, but a copy-and-paste error left a dangling semicolon at the end of an if statement. This patch removes the broken semicolon, restoring the old group validation semantics. Cc: Mark Rutland Acked-by: Dirk Behme Signed-off-by: Chen Gang Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 31e0eb353cd8..a89206799db8 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -400,7 +400,7 @@ __hw_perf_event_init(struct perf_event *event) } if (event->group_leader != event) { - if (validate_group(event) != 0); + if (validate_group(event) != 0) return -EINVAL; } From 3f7d1fe108dbaefd0c57a41753fc2c90b395f458 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 28 Feb 2013 21:53:35 +0100 Subject: [PATCH 111/261] ARM: 7665/1: Wire up kcmp syscall Wire up kcmp syscall for ability to proceed checkpoint/restore procedure on ARM platform. Signed-off-by: Alexander Kartashov Signed-off-by: Cyrill Gorcunov Acked-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/include/uapi/asm/unistd.h | 2 +- arch/arm/kernel/calls.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 4da7cde70b5d..af33b44990ed 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -404,7 +404,7 @@ #define __NR_setns (__NR_SYSCALL_BASE+375) #define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) - /* 378 for kcmp */ +#define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) /* diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 0cc57611fc4f..c6ca7e376773 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -387,7 +387,7 @@ /* 375 */ CALL(sys_setns) CALL(sys_process_vm_readv) CALL(sys_process_vm_writev) - CALL(sys_ni_syscall) /* reserved for sys_kcmp */ + CALL(sys_kcmp) CALL(sys_finit_module) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls From 65b5f42e2a9eb9c8383fb67698bf8c27657f8c14 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 20 Feb 2013 16:47:44 +1000 Subject: [PATCH 112/261] drm/nve0/graph: some random reg moved on kepler Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/engine/graph/nve0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c index 61cec0f6ff1c..4857f913efdd 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c @@ -350,7 +350,7 @@ nve0_graph_init_gpc_0(struct nvc0_graph_priv *priv) nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918); } - nv_wr32(priv, GPC_BCAST(0x1bd4), magicgpc918); + nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918); nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800)); } From 650e1203c11354ba84d69ba445abc0efcfe3890a Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 26 Feb 2013 02:33:11 +0100 Subject: [PATCH 113/261] drm/nouveau: Disable AGP on PowerPC again. Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_agp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_agp.c b/drivers/gpu/drm/nouveau/nouveau_agp.c index d28430cd2ba6..6e7a55f93a85 100644 --- a/drivers/gpu/drm/nouveau/nouveau_agp.c +++ b/drivers/gpu/drm/nouveau/nouveau_agp.c @@ -47,6 +47,18 @@ nouveau_agp_enabled(struct nouveau_drm *drm) if (drm->agp.stat == UNKNOWN) { if (!nouveau_agpmode) return false; +#ifdef __powerpc__ + /* Disable AGP by default on all PowerPC machines for + * now -- At least some UniNorth-2 AGP bridges are + * known to be broken: DMA from the host to the card + * works just fine, but writeback from the card to the + * host goes straight to memory untranslated bypassing + * the GATT somehow, making them quite painful to deal + * with... + */ + if (nouveau_agpmode == -1) + return false; +#endif return true; } From f6853faa85793bf23b46787e4039824d275453c2 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 26 Feb 2013 02:33:12 +0100 Subject: [PATCH 114/261] drm/nouveau: Fix typo in init_idx_addr_latched(). Fixes script-based modesetting on some LVDS panels. Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/subdev/bios/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c index 2cc1e6a5eb6a..9c41b58d57e2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c @@ -869,7 +869,7 @@ init_idx_addr_latched(struct nvbios_init *init) init->offset += 2; init_wr32(init, dreg, idata); - init_mask(init, creg, ~mask, data | idata); + init_mask(init, creg, ~mask, data | iaddr); } } From 67f9718b084ea7100cefa39b02863fcb14102f8c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 26 Feb 2013 12:02:54 +1000 Subject: [PATCH 115/261] drm/nv84: fix regression in page flipping Need to emit the semaphore ctxdma before trying to use the semaphore operations. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index a6237c9cbbc3..e26caf63db0c 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -512,11 +512,11 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* synchronise with the rendering channel, if necessary */ if (likely(chan)) { - ret = RING_SPACE(chan, 10); - if (ret) - return ret; - if (nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) { + ret = RING_SPACE(chan, 8); + if (ret) + return ret; + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); OUT_RING (chan, NvEvoSema0 + nv_crtc->index); OUT_RING (chan, sync->sem.offset); @@ -525,13 +525,17 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2); OUT_RING (chan, sync->sem.offset ^ 0x10); OUT_RING (chan, 0x74b1e000); - BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); - OUT_RING (chan, NvSema); } else if (nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { u64 offset = nv84_fence_crtc(chan, nv_crtc->index); offset += sync->sem.offset; + ret = RING_SPACE(chan, 12); + if (ret) + return ret; + + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); + OUT_RING (chan, chan->vram); BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); OUT_RING (chan, upper_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset)); @@ -546,6 +550,10 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, u64 offset = nv84_fence_crtc(chan, nv_crtc->index); offset += sync->sem.offset; + ret = RING_SPACE(chan, 10); + if (ret) + return ret; + BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); OUT_RING (chan, upper_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset)); From 42bed34c364786b3757f9d788d8ed39120e8f1b5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 27 Feb 2013 09:52:47 +1000 Subject: [PATCH 116/261] drm/nouveau/i2c: drop parent refcount when creating ports Fixes issue where i2c subdev never gets destroyed due to its subobjects holding references. This will mean the i2c subdev refcount goes negative during its destruction, but this isn't an issue in practice. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/subdev/i2c/base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c index a114a0ed7e98..2e98e8a3f1aa 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c @@ -142,6 +142,7 @@ nouveau_i2c_port_create_(struct nouveau_object *parent, /* drop port's i2c subdev refcount, i2c handles this itself */ if (ret == 0) { list_add_tail(&port->head, &i2c->ports); + atomic_dec(&parent->refcount); atomic_dec(&engine->refcount); } From 9f9bdaaf07dee47f73a160e6e4c64f67ee26c1d7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 2 Mar 2013 13:21:31 +1000 Subject: [PATCH 117/261] drm/nv50-: prevent some races between modesetting and page flipping nexuiz-glx + gnome-shell is able to trigger this a lot of the time. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 183 ++++++++++++++----------- 1 file changed, 103 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index e26caf63db0c..87a5a56ed358 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -55,9 +55,9 @@ /* offsets in shared sync bo of various structures */ #define EVO_SYNC(c, o) ((c) * 0x0100 + (o)) -#define EVO_MAST_NTFY EVO_SYNC( 0, 0x00) -#define EVO_FLIP_SEM0(c) EVO_SYNC((c), 0x00) -#define EVO_FLIP_SEM1(c) EVO_SYNC((c), 0x10) +#define EVO_MAST_NTFY EVO_SYNC( 0, 0x00) +#define EVO_FLIP_SEM0(c) EVO_SYNC((c) + 1, 0x00) +#define EVO_FLIP_SEM1(c) EVO_SYNC((c) + 1, 0x10) #define EVO_CORE_HANDLE (0xd1500000) #define EVO_CHAN_HANDLE(t,i) (0xd15c0000 | (((t) & 0x00ff) << 8) | (i)) @@ -341,10 +341,8 @@ struct nv50_curs { struct nv50_sync { struct nv50_dmac base; - struct { - u32 offset; - u16 value; - } sem; + u32 addr; + u32 data; }; struct nv50_ovly { @@ -471,13 +469,33 @@ nv50_display_crtc_sema(struct drm_device *dev, int crtc) return nv50_disp(dev)->sync; } +struct nv50_display_flip { + struct nv50_disp *disp; + struct nv50_sync *chan; +}; + +static bool +nv50_display_flip_wait(void *data) +{ + struct nv50_display_flip *flip = data; + if (nouveau_bo_rd32(flip->disp->sync, flip->chan->addr / 4) == + flip->chan->data); + return true; + usleep_range(1, 2); + return false; +} + void nv50_display_flip_stop(struct drm_crtc *crtc) { - struct nv50_sync *sync = nv50_sync(crtc); + struct nouveau_device *device = nouveau_dev(crtc->dev); + struct nv50_display_flip flip = { + .disp = nv50_disp(crtc->dev), + .chan = nv50_sync(crtc), + }; u32 *push; - push = evo_wait(sync, 8); + push = evo_wait(flip.chan, 8); if (push) { evo_mthd(push, 0x0084, 1); evo_data(push, 0x00000000); @@ -487,8 +505,10 @@ nv50_display_flip_stop(struct drm_crtc *crtc) evo_data(push, 0x00000000); evo_mthd(push, 0x0080, 1); evo_data(push, 0x00000000); - evo_kick(push, sync); + evo_kick(push, flip.chan); } + + nv_wait_cb(device, nv50_display_flip_wait, &flip); } int @@ -496,11 +516,10 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_channel *chan, u32 swap_interval) { struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb); - struct nv50_disp *disp = nv50_disp(crtc->dev); struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nv50_sync *sync = nv50_sync(crtc); + int head = nv_crtc->index, ret; u32 *push; - int ret; swap_interval <<= 4; if (swap_interval == 0) @@ -510,66 +529,64 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, if (unlikely(push == NULL)) return -EBUSY; - /* synchronise with the rendering channel, if necessary */ - if (likely(chan)) { - if (nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) { - ret = RING_SPACE(chan, 8); - if (ret) - return ret; + if (chan && nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) { + ret = RING_SPACE(chan, 8); + if (ret) + return ret; - BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); - OUT_RING (chan, NvEvoSema0 + nv_crtc->index); - OUT_RING (chan, sync->sem.offset); - BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); - OUT_RING (chan, 0xf00d0000 | sync->sem.value); - BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2); - OUT_RING (chan, sync->sem.offset ^ 0x10); - OUT_RING (chan, 0x74b1e000); - } else - if (nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { - u64 offset = nv84_fence_crtc(chan, nv_crtc->index); - offset += sync->sem.offset; + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); + OUT_RING (chan, NvEvoSema0 + head); + OUT_RING (chan, sync->addr ^ 0x10); + BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); + OUT_RING (chan, sync->data + 1); + BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2); + OUT_RING (chan, sync->addr); + OUT_RING (chan, sync->data); + } else + if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { + u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + ret = RING_SPACE(chan, 12); + if (ret) + return ret; - ret = RING_SPACE(chan, 12); - if (ret) - return ret; + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); + OUT_RING (chan, chan->vram); + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr ^ 0x10)); + OUT_RING (chan, lower_32_bits(addr ^ 0x10)); + OUT_RING (chan, sync->data + 1); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG); + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr)); + OUT_RING (chan, lower_32_bits(addr)); + OUT_RING (chan, sync->data); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); + } else + if (chan) { + u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + ret = RING_SPACE(chan, 10); + if (ret) + return ret; - BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); - OUT_RING (chan, chan->vram); - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset)); - OUT_RING (chan, 0xf00d0000 | sync->sem.value); - OUT_RING (chan, 0x00000002); - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset ^ 0x10)); - OUT_RING (chan, 0x74b1e000); - OUT_RING (chan, 0x00000001); - } else { - u64 offset = nv84_fence_crtc(chan, nv_crtc->index); - offset += sync->sem.offset; - - ret = RING_SPACE(chan, 10); - if (ret) - return ret; - - BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset)); - OUT_RING (chan, 0xf00d0000 | sync->sem.value); - OUT_RING (chan, 0x00001002); - BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset ^ 0x10)); - OUT_RING (chan, 0x74b1e000); - OUT_RING (chan, 0x00001001); - } + BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr ^ 0x10)); + OUT_RING (chan, lower_32_bits(addr ^ 0x10)); + OUT_RING (chan, sync->data + 1); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG | + NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD); + BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr)); + OUT_RING (chan, lower_32_bits(addr)); + OUT_RING (chan, sync->data); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL | + NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD); + } + if (chan) { + sync->addr ^= 0x10; + sync->data++; FIRE_RING (chan); } else { - nouveau_bo_wr32(disp->sync, sync->sem.offset / 4, - 0xf00d0000 | sync->sem.value); evo_sync(crtc->dev); } @@ -583,9 +600,9 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, evo_data(push, 0x40000000); } evo_mthd(push, 0x0088, 4); - evo_data(push, sync->sem.offset); - evo_data(push, 0xf00d0000 | sync->sem.value); - evo_data(push, 0x74b1e000); + evo_data(push, sync->addr); + evo_data(push, sync->data++); + evo_data(push, sync->data); evo_data(push, NvEvoSync); evo_mthd(push, 0x00a0, 2); evo_data(push, 0x00000000); @@ -613,9 +630,6 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, evo_mthd(push, 0x0080, 1); evo_data(push, 0x00000000); evo_kick(push, sync); - - sync->sem.offset ^= 0x10; - sync->sem.value++; return 0; } @@ -1387,7 +1401,8 @@ nv50_crtc_create(struct drm_device *dev, struct nouveau_object *core, int index) if (ret) goto out; - head->sync.sem.offset = EVO_SYNC(1 + index, 0x00); + head->sync.addr = EVO_FLIP_SEM0(index); + head->sync.data = 0x00000000; /* allocate overlay resources */ ret = nv50_pioc_create(disp->core, NV50_DISP_OIMM_CLASS, index, @@ -2120,15 +2135,23 @@ nv50_display_fini(struct drm_device *dev) int nv50_display_init(struct drm_device *dev) { - u32 *push = evo_wait(nv50_mast(dev), 32); - if (push) { - evo_mthd(push, 0x0088, 1); - evo_data(push, NvEvoSync); - evo_kick(push, nv50_mast(dev)); - return 0; + struct nv50_disp *disp = nv50_disp(dev); + struct drm_crtc *crtc; + u32 *push; + + push = evo_wait(nv50_mast(dev), 32); + if (!push) + return -EBUSY; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nv50_sync *sync = nv50_sync(crtc); + nouveau_bo_wr32(disp->sync, sync->addr / 4, sync->data); } - return -EBUSY; + evo_mthd(push, 0x0088, 1); + evo_data(push, NvEvoSync); + evo_kick(push, nv50_mast(dev)); + return 0; } void From ba0e3427b03c3d1550239779eca5c1c5a53a2152 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 2 Mar 2013 19:14:03 -0800 Subject: [PATCH 118/261] userns: Stop oopsing in key_change_session_keyring Dave Jones writes: > Just hit this on Linus' current tree. > > [ 89.621770] BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8 > [ 89.623111] IP: [] commit_creds+0x250/0x2f0 > [ 89.624062] PGD 122bfd067 PUD 122bfe067 PMD 0 > [ 89.624901] Oops: 0000 [#1] PREEMPT SMP > [ 89.625678] Modules linked in: caif_socket caif netrom bridge hidp 8021q garp stp mrp rose llc2 af_rxrpc phonet af_key binfmt_misc bnep l2tp_ppp can_bcm l2tp_core pppoe pppox can_raw scsi_transport_iscsi ppp_generic slhc nfnetlink can ipt_ULOG ax25 decnet irda nfc rds x25 crc_ccitt appletalk atm ipx p8023 psnap p8022 llc lockd sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_conntrack nf_conntrack ip6table_filter ip6_tables btusb bluetooth snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_pcm vhost_net snd_page_alloc snd_timer tun macvtap usb_debug snd rfkill microcode macvlan edac_core pcspkr serio_raw kvm_amd soundcore kvm r8169 mii > [ 89.637846] CPU 2 > [ 89.638175] Pid: 782, comm: trinity-main Not tainted 3.8.0+ #63 Gigabyte Technology Co., Ltd. GA-MA78GM-S2H/GA-MA78GM-S2H > [ 89.639850] RIP: 0010:[] [] commit_creds+0x250/0x2f0 > [ 89.641161] RSP: 0018:ffff880115657eb8 EFLAGS: 00010207 > [ 89.641984] RAX: 00000000000003e8 RBX: ffff88012688b000 RCX: 0000000000000000 > [ 89.643069] RDX: 0000000000000000 RSI: ffffffff81c32960 RDI: ffff880105839600 > [ 89.644167] RBP: ffff880115657ed8 R08: 0000000000000000 R09: 0000000000000000 > [ 89.645254] R10: 0000000000000001 R11: 0000000000000246 R12: ffff880105839600 > [ 89.646340] R13: ffff88011beea490 R14: ffff88011beea490 R15: 0000000000000000 > [ 89.647431] FS: 00007f3ac063b740(0000) GS:ffff88012b200000(0000) knlGS:0000000000000000 > [ 89.648660] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > [ 89.649548] CR2: 00000000000000c8 CR3: 0000000122bfc000 CR4: 00000000000007e0 > [ 89.650635] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 89.651723] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > [ 89.652812] Process trinity-main (pid: 782, threadinfo ffff880115656000, task ffff88011beea490) > [ 89.654128] Stack: > [ 89.654433] 0000000000000000 ffff8801058396a0 ffff880105839600 ffff88011beeaa78 > [ 89.655769] ffff880115657ef8 ffffffff812c7d9b ffffffff82079be0 0000000000000000 > [ 89.657073] ffff880115657f28 ffffffff8106c665 0000000000000002 ffff880115657f58 > [ 89.658399] Call Trace: > [ 89.658822] [] key_change_session_keyring+0xfb/0x140 > [ 89.659845] [] task_work_run+0xa5/0xd0 > [ 89.660698] [] do_notify_resume+0x71/0xb0 > [ 89.661581] [] int_signal+0x12/0x17 > [ 89.662385] Code: 24 90 00 00 00 48 8b b3 90 00 00 00 49 8b 4c 24 40 48 39 f2 75 08 e9 83 00 00 00 48 89 ca 48 81 fa 60 29 c3 81 0f 84 41 fe ff ff <48> 8b 8a c8 00 00 00 48 39 ce 75 e4 3b 82 d0 00 00 00 0f 84 4b > [ 89.667778] RIP [] commit_creds+0x250/0x2f0 > [ 89.668733] RSP > [ 89.669301] CR2: 00000000000000c8 > > My fastest trinity induced oops yet! > > > Appears to be.. > > if ((set_ns == subset_ns->parent) && > 850: 48 8b 8a c8 00 00 00 mov 0xc8(%rdx),%rcx > > from the inlined cred_cap_issubset By historical accident we have been reading trying to set new->user_ns from new->user_ns. Which is totally silly as new->user_ns is NULL (as is every other field in new except session_keyring at that point). The intent is clearly to copy all of the fields from old to new so copy old->user_ns into into new->user_ns. Cc: stable@vger.kernel.org Reported-by: Dave Jones Tested-by: Dave Jones Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- security/keys/process_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 58dfe0890947..a571fad91010 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -839,7 +839,7 @@ void key_change_session_keyring(struct callback_head *twork) new-> sgid = old-> sgid; new->fsgid = old->fsgid; new->user = get_uid(old->user); - new->user_ns = get_user_ns(new->user_ns); + new->user_ns = get_user_ns(old->user_ns); new->group_info = get_group_info(old->group_info); new->securebits = old->securebits; From 7f78e0351394052e1a6293e175825eb5c7869507 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 2 Mar 2013 19:39:14 -0800 Subject: [PATCH 119/261] fs: Limit sys_mount to only request filesystem modules. Modify the request_module to prefix the file system type with "fs-" and add aliases to all of the filesystems that can be built as modules to match. A common practice is to build all of the kernel code and leave code that is not commonly needed as modules, with the result that many users are exposed to any bug anywhere in the kernel. Looking for filesystems with a fs- prefix limits the pool of possible modules that can be loaded by mount to just filesystems trivially making things safer with no real cost. Using aliases means user space can control the policy of which filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. This also addresses a rare but unfortunate problem where the filesystem name is not the same as it's module name and module auto-loading would not work. While writing this patch I saw a handful of such cases. The most significant being autofs that lives in the module autofs4. This is relevant to user namespaces because we can reach the request module in get_fs_type() without having any special permissions, and people get uncomfortable when a user specified string (in this case the filesystem type) goes all of the way to request_module. After having looked at this issue I don't think there is any particular reason to perform any filtering or permission checks beyond making it clear in the module request that we want a filesystem module. The common pattern in the kernel is to call request_module() without regards to the users permissions. In general all a filesystem module does once loaded is call register_filesystem() and go to sleep. Which means there is not much attack surface exposed by loading a filesytem module unless the filesystem is mounted. In a user namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT, which most filesystems do not set today. Acked-by: Serge Hallyn Acked-by: Kees Cook Reported-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- arch/ia64/kernel/perfmon.c | 1 + arch/powerpc/platforms/cell/spufs/inode.c | 1 + arch/s390/hypfs/inode.c | 1 + drivers/firmware/efivars.c | 1 + drivers/infiniband/hw/ipath/ipath_fs.c | 1 + drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/misc/ibmasm/ibmasmfs.c | 1 + drivers/mtd/mtdchar.c | 1 + drivers/oprofile/oprofilefs.c | 1 + drivers/staging/ccg/f_fs.c | 1 + drivers/usb/gadget/f_fs.c | 1 + drivers/usb/gadget/inode.c | 1 + drivers/xen/xenfs/super.c | 1 + fs/9p/vfs_super.c | 1 + fs/adfs/super.c | 1 + fs/affs/super.c | 1 + fs/afs/super.c | 1 + fs/autofs4/init.c | 1 + fs/befs/linuxvfs.c | 1 + fs/bfs/inode.c | 1 + fs/binfmt_misc.c | 1 + fs/btrfs/super.c | 1 + fs/ceph/super.c | 1 + fs/coda/inode.c | 1 + fs/configfs/mount.c | 1 + fs/cramfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/devpts/inode.c | 1 + fs/ecryptfs/main.c | 1 + fs/efs/super.c | 1 + fs/exofs/super.c | 1 + fs/ext2/super.c | 1 + fs/ext3/super.c | 1 + fs/ext4/super.c | 5 +++-- fs/f2fs/super.c | 1 + fs/fat/namei_msdos.c | 1 + fs/fat/namei_vfat.c | 1 + fs/filesystems.c | 2 +- fs/freevxfs/vxfs_super.c | 2 +- fs/fuse/control.c | 1 + fs/fuse/inode.c | 2 ++ fs/gfs2/ops_fstype.c | 4 +++- fs/hfs/super.c | 1 + fs/hfsplus/super.c | 1 + fs/hppfs/hppfs.c | 1 + fs/hugetlbfs/inode.c | 1 + fs/isofs/inode.c | 3 +-- fs/jffs2/super.c | 1 + fs/jfs/super.c | 1 + fs/logfs/super.c | 1 + fs/minix/inode.c | 1 + fs/ncpfs/inode.c | 1 + fs/nfs/super.c | 3 ++- fs/nfsd/nfsctl.c | 1 + fs/nilfs2/super.c | 1 + fs/ntfs/super.c | 1 + fs/ocfs2/dlmfs/dlmfs.c | 1 + fs/omfs/inode.c | 1 + fs/openpromfs/inode.c | 1 + fs/qnx4/inode.c | 1 + fs/qnx6/inode.c | 1 + fs/reiserfs/super.c | 1 + fs/romfs/super.c | 1 + fs/sysv/super.c | 3 ++- fs/ubifs/super.c | 1 + fs/ufs/super.c | 1 + fs/xfs/xfs_super.c | 1 + include/linux/fs.h | 2 ++ net/sunrpc/rpc_pipe.c | 4 +--- 69 files changed, 77 insertions(+), 12 deletions(-) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 433f5e8a2cd1..2eda28414abb 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -619,6 +619,7 @@ static struct file_system_type pfm_fs_type = { .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("pfmfs"); DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 863184b182f4..3f3bb4cdbbec 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -749,6 +749,7 @@ static struct file_system_type spufs_type = { .mount = spufs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("spufs"); static int __init spufs_init(void) { diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 8538015ed4a0..5f7d7ba2874c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,6 +456,7 @@ static struct file_system_type hypfs_type = { .mount = hypfs_mount, .kill_sb = hypfs_kill_super }; +MODULE_ALIAS_FS("s390_hypfs"); static const struct super_operations hypfs_s_ops = { .statfs = simple_statfs, diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 7320bf891706..3edade07b249 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -1234,6 +1234,7 @@ static struct file_system_type efivarfs_type = { .mount = efivarfs_mount, .kill_sb = efivarfs_kill_sb, }; +MODULE_ALIAS_FS("efivarfs"); /* * Handle negative dentry. diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a479375a8fd8..e0c404bdc4a8 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -410,6 +410,7 @@ static struct file_system_type ipathfs_fs_type = { .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init ipath_init_ipathfs(void) { diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 644bd6f6467c..f247fc6e6182 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -604,6 +604,7 @@ static struct file_system_type qibfs_fs_type = { .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init qib_init_qibfs(void) { diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 6673e578b3e9..ce5b75616b45 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -110,6 +110,7 @@ static struct file_system_type ibmasmfs_type = { .mount = ibmasmfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("ibmasmfs"); static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) { diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 82c06165d3d2..92ab30ab00dc 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1238,6 +1238,7 @@ static struct file_system_type mtd_inodefs_type = { .mount = mtd_inodefs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("mtd_inodefs"); static int __init init_mtdchar(void) { diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 445ffda715ad..7c12d9c2b230 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -276,6 +276,7 @@ static struct file_system_type oprofilefs_type = { .mount = oprofilefs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("oprofilefs"); int __init oprofilefs_register(void) diff --git a/drivers/staging/ccg/f_fs.c b/drivers/staging/ccg/f_fs.c index 8adc79d1b402..f6373dade7fb 100644 --- a/drivers/staging/ccg/f_fs.c +++ b/drivers/staging/ccg/f_fs.c @@ -1223,6 +1223,7 @@ static struct file_system_type ffs_fs_type = { .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; +MODULE_ALIAS_FS("functionfs"); /* Driver's main init/cleanup functions *************************************/ diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 38388d7844fc..c377ff84bf2c 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1235,6 +1235,7 @@ static struct file_system_type ffs_fs_type = { .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; +MODULE_ALIAS_FS("functionfs"); /* Driver's main init/cleanup functions *************************************/ diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 8ac840f25ba9..e2b2e9cf254a 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -2105,6 +2105,7 @@ static struct file_system_type gadgetfs_type = { .mount = gadgetfs_mount, .kill_sb = gadgetfs_kill_sb, }; +MODULE_ALIAS_FS("gadgetfs"); /*----------------------------------------------------------------------*/ diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index ec0abb6df3c3..71679875f056 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -75,6 +75,7 @@ static struct file_system_type xenfs_type = { .mount = xenfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("xenfs"); static int __init xenfs_init(void) { diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 91dad63e5a2d..2756dcd5de6e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -365,3 +365,4 @@ struct file_system_type v9fs_fs_type = { .owner = THIS_MODULE, .fs_flags = FS_RENAME_DOES_D_MOVE, }; +MODULE_ALIAS_FS("9p"); diff --git a/fs/adfs/super.c b/fs/adfs/super.c index d57122935793..0ff4bae2c2a2 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -524,6 +524,7 @@ static struct file_system_type adfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("adfs"); static int __init init_adfs_fs(void) { diff --git a/fs/affs/super.c b/fs/affs/super.c index b84dc7352502..45161a832bbc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -622,6 +622,7 @@ static struct file_system_type affs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("affs"); static int __init init_affs_fs(void) { diff --git a/fs/afs/super.c b/fs/afs/super.c index 7c31ec399575..c4861557e385 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -45,6 +45,7 @@ struct file_system_type afs_fs_type = { .kill_sb = afs_kill_super, .fs_flags = 0, }; +MODULE_ALIAS_FS("afs"); static const struct super_operations afs_super_ops = { .statfs = afs_statfs, diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index cddc74b9cdb2..b3db517e89ec 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -26,6 +26,7 @@ static struct file_system_type autofs_fs_type = { .mount = autofs_mount, .kill_sb = autofs4_kill_sb, }; +MODULE_ALIAS_FS("autofs"); static int __init init_autofs4_fs(void) { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index c8f4e25eb9e2..8615ee89ab55 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -951,6 +951,7 @@ static struct file_system_type befs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("befs"); static int __init init_befs_fs(void) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 737aaa3f7090..5e376bb93419 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -473,6 +473,7 @@ static struct file_system_type bfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("bfs"); static int __init init_bfs_fs(void) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index fecbbf3f8ff2..751df5e4f61a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -720,6 +720,7 @@ static struct file_system_type bm_fs_type = { .mount = bm_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("binfmt_misc"); static int __init init_misc_binfmt(void) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68a29a1ea068..f6b88595f858 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1558,6 +1558,7 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("btrfs"); /* * used by btrfsctl to scan devices when no FS is mounted diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9fe17c6c2876..6ddc0bca56b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -952,6 +952,7 @@ static struct file_system_type ceph_fs_type = { .kill_sb = ceph_kill_sb, .fs_flags = FS_RENAME_DOES_D_MOVE, }; +MODULE_ALIAS_FS("ceph"); #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) diff --git a/fs/coda/inode.c b/fs/coda/inode.c index dada9d0abede..4dcc0d81a7aa 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -329,4 +329,5 @@ struct file_system_type coda_fs_type = { .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("coda"); diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index aee0a7ebbd8e..7f26c3cf75ae 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -114,6 +114,7 @@ static struct file_system_type configfs_fs_type = { .mount = configfs_do_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("configfs"); struct dentry *configfs_pin_fs(void) { diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 3ceb9ec976e1..35b1c7bd18b7 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -573,6 +573,7 @@ static struct file_system_type cramfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("cramfs"); static int __init init_cramfs_fs(void) { diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 0c4f80b447fb..4888cb3fdef7 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -299,6 +299,7 @@ static struct file_system_type debug_fs_type = { .mount = debug_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("debugfs"); static struct dentry *__create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 073d30b9d1ac..79b662985efe 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -510,6 +510,7 @@ static struct file_system_type devpts_fs_type = { .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, #endif }; +MODULE_ALIAS_FS("devpts"); /* * The normal naming convention is simply /dev/pts/; this conforms diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 4e0886c9e5c4..e924cf45aad9 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -629,6 +629,7 @@ static struct file_system_type ecryptfs_fs_type = { .kill_sb = ecryptfs_kill_block_super, .fs_flags = 0 }; +MODULE_ALIAS_FS("ecryptfs"); /** * inode_info_init_once diff --git a/fs/efs/super.c b/fs/efs/super.c index 2002431ef9a0..c6f57a74a559 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -33,6 +33,7 @@ static struct file_system_type efs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("efs"); static struct pt_types sgi_pt_types[] = { {0x00, "SGI vh"}, diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5e59280d42d7..9d9763328734 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -1010,6 +1010,7 @@ static struct file_system_type exofs_type = { .mount = exofs_mount, .kill_sb = generic_shutdown_super, }; +MODULE_ALIAS_FS("exofs"); static int __init init_exofs(void) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7f68c8114026..288534920fe5 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1536,6 +1536,7 @@ static struct file_system_type ext2_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); static int __init init_ext2_fs(void) { diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5546ca225ffe..1d6e2ed85322 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -3068,6 +3068,7 @@ static struct file_system_type ext3_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext3"); static int __init init_ext3_fs(void) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5e6c87836193..34e855219231 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -90,6 +90,7 @@ static struct file_system_type ext2_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) #else #define IS_EXT2_SB(sb) (0) @@ -104,6 +105,7 @@ static struct file_system_type ext3_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) #else #define IS_EXT3_SB(sb) (0) @@ -5152,7 +5154,6 @@ static inline int ext2_feature_set_ok(struct super_block *sb) return 0; return 1; } -MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } @@ -5185,7 +5186,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb) return 0; return 1; } -MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } @@ -5199,6 +5199,7 @@ static struct file_system_type ext4_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext4"); static int __init ext4_init_feat_adverts(void) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c117649a035..fea6e582a2ed 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -687,6 +687,7 @@ static struct file_system_type f2fs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("f2fs"); static int __init init_inodecache(void) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e2cfda94a28d..081b759cff83 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -668,6 +668,7 @@ static struct file_system_type msdos_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("msdos"); static int __init init_msdos_fs(void) { diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index ac959d655e7d..2da952036a3d 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1073,6 +1073,7 @@ static struct file_system_type vfat_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("vfat"); static int __init init_vfat_fs(void) { diff --git a/fs/filesystems.c b/fs/filesystems.c index da165f6adcbf..92567d95ba6a 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -273,7 +273,7 @@ struct file_system_type *get_fs_type(const char *name) int len = dot ? dot - name : strlen(name); fs = __get_fs_type(name, len); - if (!fs && (request_module("%.*s", len, name) == 0)) + if (!fs && (request_module("fs-%.*s", len, name) == 0)) fs = __get_fs_type(name, len); if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index fed2c8afb3a9..455074308069 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -52,7 +52,6 @@ MODULE_AUTHOR("Christoph Hellwig"); MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */ static void vxfs_put_super(struct super_block *); @@ -258,6 +257,7 @@ static struct file_system_type vxfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("vxfs"); /* makes mount -t vxfs autoload the module */ static int __init vxfs_init(void) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index b7978b9f75ef..a0b0855d00a9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -341,6 +341,7 @@ static struct file_system_type fuse_ctl_fs_type = { .mount = fuse_ctl_mount, .kill_sb = fuse_ctl_kill_sb, }; +MODULE_ALIAS_FS("fusectl"); int __init fuse_ctl_init(void) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index df00993ed108..137185c3884f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1117,6 +1117,7 @@ static struct file_system_type fuse_fs_type = { .mount = fuse_mount, .kill_sb = fuse_kill_sb_anon, }; +MODULE_ALIAS_FS("fuse"); #ifdef CONFIG_BLOCK static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, @@ -1146,6 +1147,7 @@ static struct file_system_type fuseblk_fs_type = { .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; +MODULE_ALIAS_FS("fuseblk"); static inline int register_fuseblk(void) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1b612be4b873..60ede2a0f43f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1425,6 +1426,7 @@ struct file_system_type gfs2_fs_type = { .kill_sb = gfs2_kill_sb, .owner = THIS_MODULE, }; +MODULE_ALIAS_FS("gfs2"); struct file_system_type gfs2meta_fs_type = { .name = "gfs2meta", @@ -1432,4 +1434,4 @@ struct file_system_type gfs2meta_fs_type = { .mount = gfs2_mount_meta, .owner = THIS_MODULE, }; - +MODULE_ALIAS_FS("gfs2meta"); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index e93ddaadfd1e..bbaaa8a4ee64 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -466,6 +466,7 @@ static struct file_system_type hfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hfs"); static void hfs_init_once(void *p) { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 974c26f96fae..7b87284e46dc 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -654,6 +654,7 @@ static struct file_system_type hfsplus_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hfsplus"); static void hfsplus_init_once(void *p) { diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 74f55703be49..126d3c2e2dee 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -748,6 +748,7 @@ static struct file_system_type hppfs_type = { .kill_sb = kill_anon_super, .fs_flags = 0, }; +MODULE_ALIAS_FS("hppfs"); static int __init init_hppfs(void) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7f94e0cbc69c..84e3d856e91d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -896,6 +896,7 @@ static struct file_system_type hugetlbfs_fs_type = { .mount = hugetlbfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("hugetlbfs"); static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 67ce52507d7d..a67f16e846a2 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1556,6 +1556,7 @@ static struct file_system_type iso9660_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("iso9660"); static int __init init_iso9660_fs(void) { @@ -1593,5 +1594,3 @@ static void __exit exit_iso9660_fs(void) module_init(init_iso9660_fs) module_exit(exit_iso9660_fs) MODULE_LICENSE("GPL"); -/* Actual filesystem name is iso9660, as requested in filesystems.c */ -MODULE_ALIAS("iso9660"); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d3d8799e2187..0defb1cc2a35 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -356,6 +356,7 @@ static struct file_system_type jffs2_fs_type = { .mount = jffs2_mount, .kill_sb = jffs2_kill_sb, }; +MODULE_ALIAS_FS("jffs2"); static int __init init_jffs2_fs(void) { diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 060ba638becb..2003e830ed1c 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -833,6 +833,7 @@ static struct file_system_type jfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("jfs"); static void init_once(void *foo) { diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 345c24b8a6f8..54360293bcb5 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -608,6 +608,7 @@ static struct file_system_type logfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("logfs"); static int __init logfs_init(void) { diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 99541cceb584..df122496f328 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -660,6 +660,7 @@ static struct file_system_type minix_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("minix"); static int __init init_minix_fs(void) { diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 7dafd6899a62..26910c8154da 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -1051,6 +1051,7 @@ static struct file_system_type ncp_fs_type = { .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("ncpfs"); static int __init init_ncp_fs(void) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 17b32b722457..95cdcb208dfb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -294,6 +294,7 @@ struct file_system_type nfs_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { @@ -333,6 +334,7 @@ struct file_system_type nfs4_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); static int __init register_nfs4_fs(void) @@ -2717,6 +2719,5 @@ module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); -MODULE_ALIAS("nfs4"); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 13a21c8fca49..f33455b4d957 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1090,6 +1090,7 @@ static struct file_system_type nfsd_fs_type = { .mount = nfsd_mount, .kill_sb = nfsd_umount, }; +MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3c991dc84f2f..c7d1f9f18b09 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1361,6 +1361,7 @@ struct file_system_type nilfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("nilfs2"); static void nilfs_inode_init_once(void *obj) { diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 4a8289f8b16c..82650d52d916 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3079,6 +3079,7 @@ static struct file_system_type ntfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ntfs"); /* Stable names for the slab caches. */ static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache"; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 4c5fc8d77dc2..12bafb7265ce 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -640,6 +640,7 @@ static struct file_system_type dlmfs_fs_type = { .mount = dlmfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("ocfs2_dlmfs"); static int __init init_dlmfs_fs(void) { diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 25d715c7c87a..d8b0afde2179 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -572,6 +572,7 @@ static struct file_system_type omfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("omfs"); static int __init init_omfs_fs(void) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index ae47fa7efb9d..75885ffde44e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -432,6 +432,7 @@ static struct file_system_type openprom_fs_type = { .mount = openprom_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("openpromfs"); static void op_inode_init_once(void *data) { diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 43098bb5723a..2e8caa62da78 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -412,6 +412,7 @@ static struct file_system_type qnx4_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("qnx4"); static int __init init_qnx4_fs(void) { diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 57199a52a351..8d941edfefa1 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -672,6 +672,7 @@ static struct file_system_type qnx6_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("qnx6"); static int __init init_qnx6_fs(void) { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 418bdc3a57da..194113b1b11b 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2434,6 +2434,7 @@ struct file_system_type reiserfs_fs_type = { .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("reiserfs"); MODULE_DESCRIPTION("ReiserFS journaled filesystem"); MODULE_AUTHOR("Hans Reiser "); diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 7e8d3a80bdab..15cbc41ee365 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -599,6 +599,7 @@ static struct file_system_type romfs_fs_type = { .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("romfs"); /* * inode storage initialiser diff --git a/fs/sysv/super.c b/fs/sysv/super.c index a38e87bdd78d..a39938b1feea 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -545,6 +545,7 @@ static struct file_system_type sysv_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("sysv"); static struct file_system_type v7_fs_type = { .owner = THIS_MODULE, @@ -553,6 +554,7 @@ static struct file_system_type v7_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("v7"); static int __init init_sysv_fs(void) { @@ -586,5 +588,4 @@ static void __exit exit_sysv_fs(void) module_init(init_sysv_fs) module_exit(exit_sysv_fs) -MODULE_ALIAS("v7"); MODULE_LICENSE("GPL"); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ddc0f6ae65e9..ac838b844936 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2174,6 +2174,7 @@ static struct file_system_type ubifs_fs_type = { .mount = ubifs_mount, .kill_sb = kill_ubifs_super, }; +MODULE_ALIAS_FS("ubifs"); /* * Inode slab cache constructor. diff --git a/fs/ufs/super.c b/fs/ufs/super.c index dc8e3a861d0f..329f2f53b7ed 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1500,6 +1500,7 @@ static struct file_system_type ufs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ufs"); static int __init init_ufs_fs(void) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c407121873b4..ea341cea68cb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1561,6 +1561,7 @@ static struct file_system_type xfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) diff --git a/include/linux/fs.h b/include/linux/fs.h index 74a907b8b950..2c28271ab9d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,6 +1825,8 @@ struct file_system_type { struct lock_class_key i_mutex_dir_key; }; +#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) + extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7b9b40224a27..a0f48a51e14e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1174,6 +1174,7 @@ static struct file_system_type rpc_pipe_fs_type = { .mount = rpc_mount, .kill_sb = rpc_kill_sb, }; +MODULE_ALIAS_FS("rpc_pipefs"); static void init_once(void *foo) @@ -1218,6 +1219,3 @@ void unregister_rpc_pipefs(void) kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } - -/* Make 'mount -t rpc_pipefs ...' autoload this module. */ -MODULE_ALIAS("rpc_pipefs"); From e3333e572f29109740b48302035a59f9db0fa8e9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 20 Feb 2013 20:58:42 -0800 Subject: [PATCH 120/261] hwmon: Update my e-mail address in driver documentation Most of the hwmon driver documentation still listed my old invalid e-mail address. Fix it. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare --- Documentation/hwmon/adm1275 | 2 +- Documentation/hwmon/jc42 | 2 +- Documentation/hwmon/lineage-pem | 2 +- Documentation/hwmon/lm25066 | 2 +- Documentation/hwmon/ltc2978 | 2 +- Documentation/hwmon/ltc4261 | 2 +- Documentation/hwmon/max16064 | 2 +- Documentation/hwmon/max16065 | 2 +- Documentation/hwmon/max34440 | 2 +- Documentation/hwmon/max8688 | 2 +- Documentation/hwmon/pmbus | 2 +- Documentation/hwmon/smm665 | 2 +- Documentation/hwmon/ucd9000 | 2 +- Documentation/hwmon/ucd9200 | 2 +- Documentation/hwmon/zl6100 | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/hwmon/adm1275 b/Documentation/hwmon/adm1275 index 2cfa25667123..15b4a20d5062 100644 --- a/Documentation/hwmon/adm1275 +++ b/Documentation/hwmon/adm1275 @@ -15,7 +15,7 @@ Supported chips: Addresses scanned: - Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1276.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42 index 165077121238..868d74d6b773 100644 --- a/Documentation/hwmon/jc42 +++ b/Documentation/hwmon/jc42 @@ -49,7 +49,7 @@ Supported chips: Addresses scanned: I2C 0x18 - 0x1f Author: - Guenter Roeck + Guenter Roeck Description diff --git a/Documentation/hwmon/lineage-pem b/Documentation/hwmon/lineage-pem index 2ba5ed126858..83b2ddc160c8 100644 --- a/Documentation/hwmon/lineage-pem +++ b/Documentation/hwmon/lineage-pem @@ -8,7 +8,7 @@ Supported devices: Documentation: http://www.lineagepower.com/oem/pdf/CPLI2C.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066 index a21db81c4591..26025e419d35 100644 --- a/Documentation/hwmon/lm25066 +++ b/Documentation/hwmon/lm25066 @@ -19,7 +19,7 @@ Supported chips: Datasheet: http://www.national.com/pf/LM/LM5066.html -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/ltc2978 b/Documentation/hwmon/ltc2978 index c365f9beb5dd..1642348de2d6 100644 --- a/Documentation/hwmon/ltc2978 +++ b/Documentation/hwmon/ltc2978 @@ -11,7 +11,7 @@ Supported chips: Addresses scanned: - Datasheet: http://cds.linear.com/docs/Datasheet/3880f.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/ltc4261 b/Documentation/hwmon/ltc4261 index eba2e2c4b94d..9378a75c6134 100644 --- a/Documentation/hwmon/ltc4261 +++ b/Documentation/hwmon/ltc4261 @@ -8,7 +8,7 @@ Supported chips: Datasheet: http://cds.linear.com/docs/Datasheet/42612fb.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/max16064 b/Documentation/hwmon/max16064 index f8b478076f6d..d59cc7829bec 100644 --- a/Documentation/hwmon/max16064 +++ b/Documentation/hwmon/max16064 @@ -7,7 +7,7 @@ Supported chips: Addresses scanned: - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065 index c11f64a1f2ad..208a29e43010 100644 --- a/Documentation/hwmon/max16065 +++ b/Documentation/hwmon/max16065 @@ -24,7 +24,7 @@ Supported chips: http://datasheets.maxim-ic.com/en/ds/MAX16070-MAX16071.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/max34440 b/Documentation/hwmon/max34440 index 47651ff341ae..37cbf472a19d 100644 --- a/Documentation/hwmon/max34440 +++ b/Documentation/hwmon/max34440 @@ -27,7 +27,7 @@ Supported chips: Addresses scanned: - Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX34461.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/max8688 b/Documentation/hwmon/max8688 index fe849871df32..e78078638b91 100644 --- a/Documentation/hwmon/max8688 +++ b/Documentation/hwmon/max8688 @@ -7,7 +7,7 @@ Supported chips: Addresses scanned: - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus index 3d3a0f97f966..cf756ed48ff9 100644 --- a/Documentation/hwmon/pmbus +++ b/Documentation/hwmon/pmbus @@ -34,7 +34,7 @@ Supported chips: Addresses scanned: - Datasheet: n.a. -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/smm665 b/Documentation/hwmon/smm665 index 59e316140542..a341eeedab75 100644 --- a/Documentation/hwmon/smm665 +++ b/Documentation/hwmon/smm665 @@ -29,7 +29,7 @@ Supported chips: http://www.summitmicro.com/prod_select/summary/SMM766/SMM766_2086.pdf http://www.summitmicro.com/prod_select/summary/SMM766B/SMM766B_2122.pdf -Author: Guenter Roeck +Author: Guenter Roeck Module Parameters diff --git a/Documentation/hwmon/ucd9000 b/Documentation/hwmon/ucd9000 index 0df5f276505b..805e33edb978 100644 --- a/Documentation/hwmon/ucd9000 +++ b/Documentation/hwmon/ucd9000 @@ -11,7 +11,7 @@ Supported chips: http://focus.ti.com/lit/ds/symlink/ucd9090.pdf http://focus.ti.com/lit/ds/symlink/ucd90910.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/ucd9200 b/Documentation/hwmon/ucd9200 index fd7d07b1908a..1e8060e631bd 100644 --- a/Documentation/hwmon/ucd9200 +++ b/Documentation/hwmon/ucd9200 @@ -15,7 +15,7 @@ Supported chips: http://focus.ti.com/lit/ds/symlink/ucd9246.pdf http://focus.ti.com/lit/ds/symlink/ucd9248.pdf -Author: Guenter Roeck +Author: Guenter Roeck Description diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100 index 3d924b6b59e9..756b57c6b73e 100644 --- a/Documentation/hwmon/zl6100 +++ b/Documentation/hwmon/zl6100 @@ -54,7 +54,7 @@ http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401 http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256 -Author: Guenter Roeck +Author: Guenter Roeck Description From 6d21a416562c2532f45cbcd40f47653f61d45533 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 27 Jan 2013 09:36:36 -0800 Subject: [PATCH 121/261] hwmon: (pmbus/ltc2978) Update datasheet links Links to datasheets are no longer valid. Provide links to product information instead (which provides links to the datasheets and is hopefully more persistent). Signed-off-by: Guenter Roeck Acked-by: Jean Delvare --- Documentation/hwmon/ltc2978 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/hwmon/ltc2978 b/Documentation/hwmon/ltc2978 index 1642348de2d6..e4d75c606c97 100644 --- a/Documentation/hwmon/ltc2978 +++ b/Documentation/hwmon/ltc2978 @@ -5,11 +5,11 @@ Supported chips: * Linear Technology LTC2978 Prefix: 'ltc2978' Addresses scanned: - - Datasheet: http://cds.linear.com/docs/Datasheet/2978fa.pdf + Datasheet: http://www.linear.com/product/ltc2978 * Linear Technology LTC3880 Prefix: 'ltc3880' Addresses scanned: - - Datasheet: http://cds.linear.com/docs/Datasheet/3880f.pdf + Datasheet: http://www.linear.com/product/ltc3880 Author: Guenter Roeck From dbd712c2272764a536e29ad6841dba74989a39d9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 21 Feb 2013 09:33:25 -0800 Subject: [PATCH 122/261] hwmon: (pmbus/ltc2978) Fix peak attribute handling Peak attributes were not initialized and cleared correctly. Also, temp2_max is only supported on page 0 and thus does not need to be an array. Signed-off-by: Guenter Roeck Cc: stable@vger.kernel.org # 3.2+ Acked-by: Jean Delvare --- drivers/hwmon/pmbus/ltc2978.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index 9652a2c92a24..cc29a7e57bd7 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -62,7 +62,7 @@ struct ltc2978_data { int temp_min, temp_max; int vout_min[8], vout_max[8]; int iout_max[2]; - int temp2_max[2]; + int temp2_max; struct pmbus_driver_info info; }; @@ -204,10 +204,9 @@ static int ltc3880_read_word_data(struct i2c_client *client, int page, int reg) ret = pmbus_read_word_data(client, page, LTC3880_MFR_TEMPERATURE2_PEAK); if (ret >= 0) { - if (lin11_to_val(ret) - > lin11_to_val(data->temp2_max[page])) - data->temp2_max[page] = ret; - ret = data->temp2_max[page]; + if (lin11_to_val(ret) > lin11_to_val(data->temp2_max)) + data->temp2_max = ret; + ret = data->temp2_max; } break; case PMBUS_VIRT_READ_VIN_MIN: @@ -248,11 +247,11 @@ static int ltc2978_write_word_data(struct i2c_client *client, int page, switch (reg) { case PMBUS_VIRT_RESET_IOUT_HISTORY: - data->iout_max[page] = 0x7fff; + data->iout_max[page] = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; case PMBUS_VIRT_RESET_TEMP2_HISTORY: - data->temp2_max[page] = 0x7fff; + data->temp2_max = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; case PMBUS_VIRT_RESET_VOUT_HISTORY: @@ -262,12 +261,12 @@ static int ltc2978_write_word_data(struct i2c_client *client, int page, break; case PMBUS_VIRT_RESET_VIN_HISTORY: data->vin_min = 0x7bff; - data->vin_max = 0; + data->vin_max = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; case PMBUS_VIRT_RESET_TEMP_HISTORY: data->temp_min = 0x7bff; - data->temp_max = 0x7fff; + data->temp_max = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; default: @@ -321,10 +320,11 @@ static int ltc2978_probe(struct i2c_client *client, info = &data->info; info->write_word_data = ltc2978_write_word_data; - data->vout_min[0] = 0xffff; data->vin_min = 0x7bff; + data->vin_max = 0x7c00; data->temp_min = 0x7bff; - data->temp_max = 0x7fff; + data->temp_max = 0x7c00; + data->temp2_max = 0x7c00; switch (id->driver_data) { case ltc2978: @@ -336,7 +336,6 @@ static int ltc2978_probe(struct i2c_client *client, for (i = 1; i < 8; i++) { info->func[i] = PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT; - data->vout_min[i] = 0xffff; } break; case ltc3880: @@ -352,11 +351,14 @@ static int ltc2978_probe(struct i2c_client *client, | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; - data->vout_min[1] = 0xffff; + data->iout_max[0] = 0x7c00; + data->iout_max[1] = 0x7c00; break; default: return -ENODEV; } + for (i = 0; i < info->pages; i++) + data->vout_min[i] = 0xffff; return pmbus_do_probe(client, id, info); } From f366fccd0809f13ba20d64cae3c83f7338c88af7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 21 Feb 2013 10:49:40 -0800 Subject: [PATCH 123/261] hwmon: (pmbus/ltc2978) Use detected chip ID to select supported functionality We read the chip ID from the chip, use it to determine if the chip ID provided to the driver is correct, and report it if wrong. We should also use the correct chip ID to select supported functionality. Signed-off-by: Guenter Roeck Cc: stable@vger.kernel.org # 3.2+ Acked-by: Jean Delvare --- drivers/hwmon/pmbus/ltc2978.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index cc29a7e57bd7..a58de38e23d8 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -326,7 +326,7 @@ static int ltc2978_probe(struct i2c_client *client, data->temp_max = 0x7c00; data->temp2_max = 0x7c00; - switch (id->driver_data) { + switch (data->id) { case ltc2978: info->read_word_data = ltc2978_read_word_data; info->pages = 8; From ab302bb0b87fe01f3110c1c5d5a6fca439d27c6b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 28 Feb 2013 13:05:13 +0100 Subject: [PATCH 124/261] hwmon: (adt7410) Document ADT7420 support The adt7410 driver supports the ADT7420, but its documentation file makes no mention of that. Add this refrence, and a brief a description of the differences between the ADT7410 and the ADT7420. Signed-off-by: Jean Delvare Cc: Lars-Peter Clausen Cc: Hartmut Knaack Cc: Guenter Roeck Signed-off-by: Guenter Roeck --- Documentation/hwmon/adt7410 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/hwmon/adt7410 b/Documentation/hwmon/adt7410 index 96004000dc2a..58150c480e56 100644 --- a/Documentation/hwmon/adt7410 +++ b/Documentation/hwmon/adt7410 @@ -4,9 +4,14 @@ Kernel driver adt7410 Supported chips: * Analog Devices ADT7410 Prefix: 'adt7410' - Addresses scanned: I2C 0x48 - 0x4B + Addresses scanned: None Datasheet: Publicly available at the Analog Devices website http://www.analog.com/static/imported-files/data_sheets/ADT7410.pdf + * Analog Devices ADT7420 + Prefix: 'adt7420' + Addresses scanned: None + Datasheet: Publicly available at the Analog Devices website + http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf Author: Hartmut Knaack @@ -27,6 +32,10 @@ value per second or even justget one sample on demand for power saving. Besides, it can completely power down its ADC, if power management is required. +The ADT7420 is register compatible, the only differences being the package, +a slightly narrower operating temperature range (-40°C to +150°C), and a +better accuracy (0.25°C instead of 0.50°C.) + Configuration Notes ------------------- From 3e78080f81481aa8340374d5a37ae033c1cf4272 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 2 Mar 2013 15:33:30 +0800 Subject: [PATCH 125/261] hwmon: (sht15) Check return value of regulator_enable() Not having power is a pretty serious error so check that we are able to enable the supply and error out if we can't. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org #3.8+; 3.0+ will need manual backport Signed-off-by: Guenter Roeck --- drivers/hwmon/sht15.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index bfe326e896df..2507f902fb7a 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -965,7 +965,13 @@ static int sht15_probe(struct platform_device *pdev) if (voltage) data->supply_uv = voltage; - regulator_enable(data->reg); + ret = regulator_enable(data->reg); + if (ret != 0) { + dev_err(&pdev->dev, + "failed to enable regulator: %d\n", ret); + return ret; + } + /* * Setup a notifier block to update this if another device * causes the voltage to change From 290502bee239062499297916bb7d21d205e99d62 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Feb 2013 00:39:37 -0800 Subject: [PATCH 126/261] eCryptfs: allow userspace messaging to be disabled When the userspace messaging (for the less common case of userspace key wrap/unwrap via ecryptfsd) is not needed, allow eCryptfs to build with it removed. This saves on kernel code size and reduces potential attack surface by removing the /dev/ecryptfs node. Signed-off-by: Kees Cook Signed-off-by: Tyler Hicks --- fs/ecryptfs/Kconfig | 8 +++++++ fs/ecryptfs/Makefile | 7 ++++-- fs/ecryptfs/ecryptfs_kernel.h | 40 +++++++++++++++++++++++++++++++++-- fs/ecryptfs/keystore.c | 4 ++-- include/linux/ecryptfs.h | 12 ++--------- 5 files changed, 55 insertions(+), 16 deletions(-) diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index cc16562654de..1f63120b669a 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -12,3 +12,11 @@ config ECRYPT_FS To compile this file system support as a module, choose M here: the module will be called ecryptfs. + +config ECRYPT_FS_MESSAGING + bool "Enable notifications for userspace key wrap/unwrap" + depends on ECRYPT_FS + help + Enables the /dev/ecryptfs entry for use by ecryptfsd. This allows + for userspace to wrap/unwrap file encryption keys by other + backends, like OpenSSL. diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 2cc9ee4ad2eb..49678a69947d 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -1,7 +1,10 @@ # -# Makefile for the Linux 2.6 eCryptfs +# Makefile for the Linux eCryptfs # obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o +ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \ + crypto.o keystore.o kthread.o debug.o + +ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index cfb4b9fed520..a9df69efadc2 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -172,6 +172,19 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24 #define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32) +#ifdef CONFIG_ECRYPT_FS_MESSAGING +# define ECRYPTFS_VERSIONING_MASK_MESSAGING (ECRYPTFS_VERSIONING_DEVMISC \ + | ECRYPTFS_VERSIONING_PUBKEY) +#else +# define ECRYPTFS_VERSIONING_MASK_MESSAGING 0 +#endif + +#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ + | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ + | ECRYPTFS_VERSIONING_XATTR \ + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_MASK_MESSAGING \ + | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) struct ecryptfs_key_sig { struct list_head crypt_stat_list; char keysig[ECRYPTFS_SIG_SIZE_HEX + 1]; @@ -399,7 +412,9 @@ struct ecryptfs_daemon { struct hlist_node euid_chain; }; +#ifdef CONFIG_ECRYPT_FS_MESSAGING extern struct mutex ecryptfs_daemon_hash_mux; +#endif static inline size_t ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) @@ -604,6 +619,7 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); +#ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_process_response(struct ecryptfs_daemon *daemon, struct ecryptfs_message *msg, u32 seq); int ecryptfs_send_message(char *data, int data_len, @@ -612,6 +628,24 @@ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **emsg); int ecryptfs_init_messaging(void); void ecryptfs_release_messaging(void); +#else +static inline int ecryptfs_init_messaging(void) +{ + return 0; +} +static inline void ecryptfs_release_messaging(void) +{ } +static inline int ecryptfs_send_message(char *data, int data_len, + struct ecryptfs_msg_ctx **msg_ctx) +{ + return -ENOTCONN; +} +static inline int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, + struct ecryptfs_message **emsg) +{ + return -ENOMSG; +} +#endif void ecryptfs_write_header_metadata(char *virt, @@ -649,12 +683,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); -int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, size_t *length_size); int ecryptfs_write_packet_length(char *dest, size_t size, size_t *packet_size_length); +#ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_init_ecryptfs_miscdev(void); void ecryptfs_destroy_ecryptfs_miscdev(void); int ecryptfs_send_miscdev(char *data, size_t data_size, @@ -663,6 +696,9 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file); +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); +#endif int ecryptfs_init_kthread(void); void ecryptfs_destroy_kthread(void); int ecryptfs_privileged_open(struct file **lower_file, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 5aceff202dc0..7d52806c2119 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1168,7 +1168,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { ecryptfs_printk(KERN_ERR, "Error sending message to " - "ecryptfsd\n"); + "ecryptfsd: %d\n", rc); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); @@ -1988,7 +1988,7 @@ pki_encrypt_session_key(struct key *auth_tok_key, rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { ecryptfs_printk(KERN_ERR, "Error sending message to " - "ecryptfsd\n"); + "ecryptfsd: %d\n", rc); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h index 2224a8c0cb64..8d5ab998a222 100644 --- a/include/linux/ecryptfs.h +++ b/include/linux/ecryptfs.h @@ -6,9 +6,8 @@ #define ECRYPTFS_VERSION_MINOR 0x04 #define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 /* These flags indicate which features are supported by the kernel - * module; userspace tools such as the mount helper read - * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine - * how to behave. */ + * module; userspace tools such as the mount helper read the feature + * bits from a sysfs handle in order to determine how to behave. */ #define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 #define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 @@ -19,13 +18,6 @@ #define ECRYPTFS_VERSIONING_HMAC 0x00000080 #define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 #define ECRYPTFS_VERSIONING_GCM 0x00000200 -#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ - | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ - | ECRYPTFS_VERSIONING_PUBKEY \ - | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY \ - | ECRYPTFS_VERSIONING_DEVMISC \ - | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 From 0010aeed7bfdc8d1048e524d285a69c11e0ac336 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 26 Feb 2013 11:34:34 +0000 Subject: [PATCH 127/261] metag: remove SET_PERSONALITY() Commit e72837e3e7bae3f182c4ac63c9424e86f1158dd0 ("default SET_PERSONALITY() in linux/elf.h"). The above commit moved the common definition of SET_PERSONALITY() in a bunch of the arch headers to linux/elf.h. Metag shares that common definition so remove it from arch/metag/include/asm/elf.h too. Signed-off-by: James Hogan --- arch/metag/include/asm/elf.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h index d63b9d0e57dd..d2baf6961794 100644 --- a/arch/metag/include/asm/elf.h +++ b/arch/metag/include/asm/elf.h @@ -100,9 +100,6 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #define STACK_RND_MASK (0) #ifdef CONFIG_METAG_USER_TCM From 40f09f3cd69a356b4245794e46439674336413fb Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 4 Mar 2013 10:12:35 +0900 Subject: [PATCH 128/261] metag: Inhibit NUMA balancing. The metag NUMA implementation follows the SH model, using different nodes for memories with different latencies. As such, we ensure that automated balancing between nodes is inhibited, by way of the new ARCH_WANT_VARIABLE_LOCALITY. Signed-off-by: Paul Mundt Signed-off-by: James Hogan --- arch/metag/mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig index cd7f2f2ad416..975f2f4e3ecf 100644 --- a/arch/metag/mm/Kconfig +++ b/arch/metag/mm/Kconfig @@ -40,6 +40,7 @@ endchoice config NUMA bool "Non Uniform Memory Access (NUMA) Support" + select ARCH_WANT_NUMA_VARIABLE_LOCALITY help Some Meta systems have MMU-mappable on-chip memories with lower latencies than main memory. This enables support for From 091930a2e612a02debe8694b41f96e33fe45bba2 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 28 Feb 2013 18:29:19 +0000 Subject: [PATCH 129/261] mailbox, pl320-ipc: remove __init from probe function Avoids a section mismatch. Signed-off-by: Mark Langsdorf Signed-off-by: Rafael J. Wysocki --- drivers/mailbox/pl320-ipc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c index c45b3aedafba..d873cbae2fbb 100644 --- a/drivers/mailbox/pl320-ipc.c +++ b/drivers/mailbox/pl320-ipc.c @@ -138,8 +138,7 @@ int pl320_ipc_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(pl320_ipc_unregister_notifier); -static int __init pl320_probe(struct amba_device *adev, - const struct amba_id *id) +static int pl320_probe(struct amba_device *adev, const struct amba_id *id) { int ret; From e5dde92cb2befb108ae8cfe8db68a954c164d77c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Feb 2013 05:38:00 +0000 Subject: [PATCH 130/261] cpufreq: Fix a typo in comment Fix a typo in a comment in cpufreq_governor.h. [rjw: Changelog] Signed-off-by: Namhyung Kim Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index d2ac91150600..46bde01eee62 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -64,7 +64,7 @@ static void *get_cpu_dbs_info_s(int cpu) \ * dbs: used as a shortform for demand based switching It helps to keep variable * names smaller, simpler * cdbs: common dbs - * on_*: On-demand governor + * od_*: On-demand governor * cs_*: Conservative governor */ From 4b87581036849723242cadaa161e9b01234ef9ae Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 26 Feb 2013 23:53:02 +0000 Subject: [PATCH 131/261] PM / OPP: improve introductory documentation Make Operating Performance Points (OPP) library introductory chapter a little more reader-friendly. Split the chapter into two sections, highlight the definition with an example and minor rewording to be verbose. Reported-by: Linus Torvalds Signed-off-by: Nishanth Menon Reviewed-by: Randy Dunlap Signed-off-by: Rafael J. Wysocki --- Documentation/power/opp.txt | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt index 3035d00757ad..425c51d56aef 100644 --- a/Documentation/power/opp.txt +++ b/Documentation/power/opp.txt @@ -1,6 +1,5 @@ -*=============* -* OPP Library * -*=============* +Operating Performance Points (OPP) Library +========================================== (C) 2009-2010 Nishanth Menon , Texas Instruments Incorporated @@ -16,15 +15,31 @@ Contents 1. Introduction =============== +1.1 What is an Operating Performance Point (OPP)? + Complex SoCs of today consists of a multiple sub-modules working in conjunction. In an operational system executing varied use cases, not all modules in the SoC need to function at their highest performing frequency all the time. To facilitate this, sub-modules in a SoC are grouped into domains, allowing some -domains to run at lower voltage and frequency while other domains are loaded -more. The set of discrete tuples consisting of frequency and voltage pairs that +domains to run at lower voltage and frequency while other domains run at +voltage/frequency pairs that are higher. + +The set of discrete tuples consisting of frequency and voltage pairs that the device will support per domain are called Operating Performance Points or OPPs. +As an example: +Let us consider an MPU device which supports the following: +{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V}, +{1GHz at minimum voltage of 1.3V} + +We can represent these as three OPPs as the following {Hz, uV} tuples: +{300000000, 1000000} +{800000000, 1200000} +{1000000000, 1300000} + +1.2 Operating Performance Points Library + OPP library provides a set of helper functions to organize and query the OPP information. The library is located in drivers/base/power/opp.c and the header is located in include/linux/opp.h. OPP library can be enabled by enabling From f5f43dcfff3a3c7f7de4a0cfca0106a0ccd58bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20L=C3=B3pez?= Date: Mon, 25 Feb 2013 11:07:45 +0000 Subject: [PATCH 132/261] cpufreq: highbank: do not initialize array with a loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As uninitialized array members will be initialized to zero, we can avoid using a for loop by setting a value to it. Signed-off-by: Emilio López Acked-by: Viresh Kumar Acked-By: Mark Langsdorf Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/highbank-cpufreq.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index 66e3a71b81a3..b61b5a3fad64 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -28,13 +28,7 @@ static int hb_voltage_change(unsigned int freq) { - int i; - u32 msg[HB_CPUFREQ_IPC_LEN]; - - msg[0] = HB_CPUFREQ_CHANGE_NOTE; - msg[1] = freq / 1000000; - for (i = 2; i < HB_CPUFREQ_IPC_LEN; i++) - msg[i] = 0; + u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000}; return pl320_ipc_transmit(msg); } From b81ea1b5ac4d3c6a628158b736dd4a98c46c29d9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:48:14 +0100 Subject: [PATCH 133/261] PM / QoS: Fix concurrency issues and memory leaks in device PM QoS The current device PM QoS code assumes that certain functions will never be called in parallel with each other (for example, it is assumed that dev_pm_qos_expose_flags() won't be called in parallel with dev_pm_qos_hide_flags() for the same device and analogously for the latency limit), which may be overly optimistic. Moreover, dev_pm_qos_expose_flags() and dev_pm_qos_expose_latency_limit() leak memory in error code paths (req needs to be freed on errors) and __dev_pm_qos_drop_user_request() forgets to free the request. To fix the above issues put more things under the device PM QoS mutex to make them mutually exclusive and add the missing freeing of memory. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 129 ++++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 3d4d1f8aac5c..2159d62c858a 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -344,6 +344,13 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 curr_value; int ret = 0; + if (!req) /*guard against callers passing in null */ + return -EINVAL; + + if (WARN(!dev_pm_qos_request_active(req), + "%s() called for unknown object\n", __func__)) + return -EINVAL; + if (!req->dev->power.qos) return -ENODEV; @@ -386,6 +393,17 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { int ret; + mutex_lock(&dev_pm_qos_mtx); + ret = __dev_pm_qos_update_request(req, new_value); + mutex_unlock(&dev_pm_qos_mtx); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); + +static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req) +{ + int ret = 0; + if (!req) /*guard against callers passing in null */ return -EINVAL; @@ -393,13 +411,15 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) "%s() called for unknown object\n", __func__)) return -EINVAL; - mutex_lock(&dev_pm_qos_mtx); - ret = __dev_pm_qos_update_request(req, new_value); - mutex_unlock(&dev_pm_qos_mtx); - + if (req->dev->power.qos) { + ret = apply_constraint(req, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } else { + ret = -ENODEV; + } return ret; } -EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); /** * dev_pm_qos_remove_request - modifies an existing qos request @@ -418,26 +438,10 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); */ int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { - int ret = 0; - - if (!req) /*guard against callers passing in null */ - return -EINVAL; - - if (WARN(!dev_pm_qos_request_active(req), - "%s() called for unknown object\n", __func__)) - return -EINVAL; + int ret; mutex_lock(&dev_pm_qos_mtx); - - if (req->dev->power.qos) { - ret = apply_constraint(req, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); - memset(req, 0, sizeof(*req)); - } else { - /* Return if the device has been removed */ - ret = -ENODEV; - } - + ret = __dev_pm_qos_remove_request(req); mutex_unlock(&dev_pm_qos_mtx); return ret; } @@ -563,16 +567,20 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request); static void __dev_pm_qos_drop_user_request(struct device *dev, enum dev_pm_qos_req_type type) { + struct dev_pm_qos_request *req = NULL; + switch(type) { case DEV_PM_QOS_LATENCY: - dev_pm_qos_remove_request(dev->power.qos->latency_req); + req = dev->power.qos->latency_req; dev->power.qos->latency_req = NULL; break; case DEV_PM_QOS_FLAGS: - dev_pm_qos_remove_request(dev->power.qos->flags_req); + req = dev->power.qos->flags_req; dev->power.qos->flags_req = NULL; break; } + __dev_pm_qos_remove_request(req); + kfree(req); } /** @@ -588,22 +596,36 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) if (!device_is_registered(dev) || value < 0) return -EINVAL; - if (dev->power.qos && dev->power.qos->latency_req) - return -EEXIST; - req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY, value); - if (ret < 0) + if (ret < 0) { + kfree(req); return ret; + } + + mutex_lock(&dev_pm_qos_mtx); + + if (!dev->power.qos) + ret = -ENODEV; + else if (dev->power.qos->latency_req) + ret = -EEXIST; + + if (ret < 0) { + __dev_pm_qos_remove_request(req); + kfree(req); + goto out; + } dev->power.qos->latency_req = req; ret = pm_qos_sysfs_add_latency(dev); if (ret) __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); + out: + mutex_unlock(&dev_pm_qos_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); @@ -614,10 +636,14 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); */ void dev_pm_qos_hide_latency_limit(struct device *dev) { + mutex_lock(&dev_pm_qos_mtx); + if (dev->power.qos && dev->power.qos->latency_req) { pm_qos_sysfs_remove_latency(dev); __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); } + + mutex_unlock(&dev_pm_qos_mtx); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit); @@ -634,24 +660,37 @@ int dev_pm_qos_expose_flags(struct device *dev, s32 val) if (!device_is_registered(dev)) return -EINVAL; - if (dev->power.qos && dev->power.qos->flags_req) - return -EEXIST; - req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; - pm_runtime_get_sync(dev); ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val); - if (ret < 0) - goto fail; + if (ret < 0) { + kfree(req); + return ret; + } + + pm_runtime_get_sync(dev); + mutex_lock(&dev_pm_qos_mtx); + + if (!dev->power.qos) + ret = -ENODEV; + else if (dev->power.qos->flags_req) + ret = -EEXIST; + + if (ret < 0) { + __dev_pm_qos_remove_request(req); + kfree(req); + goto out; + } dev->power.qos->flags_req = req; ret = pm_qos_sysfs_add_flags(dev); if (ret) __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); -fail: + out: + mutex_unlock(&dev_pm_qos_mtx); pm_runtime_put(dev); return ret; } @@ -663,12 +702,16 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags); */ void dev_pm_qos_hide_flags(struct device *dev) { + pm_runtime_get_sync(dev); + mutex_lock(&dev_pm_qos_mtx); + if (dev->power.qos && dev->power.qos->flags_req) { pm_qos_sysfs_remove_flags(dev); - pm_runtime_get_sync(dev); __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); - pm_runtime_put(dev); } + + mutex_unlock(&dev_pm_qos_mtx); + pm_runtime_put(dev); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags); @@ -683,12 +726,14 @@ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) s32 value; int ret; - if (!dev->power.qos || !dev->power.qos->flags_req) - return -EINVAL; - pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_mtx); + if (!dev->power.qos || !dev->power.qos->flags_req) { + ret = -EINVAL; + goto out; + } + value = dev_pm_qos_requested_flags(dev); if (set) value |= mask; @@ -697,9 +742,9 @@ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value); + out: mutex_unlock(&dev_pm_qos_mtx); pm_runtime_put(dev); - return ret; } #endif /* CONFIG_PM_RUNTIME */ From 37530f2bda039774bd65aea14cc1d1dd26a82b9e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Mar 2013 14:22:57 +0100 Subject: [PATCH 134/261] PM / QoS: Remove device PM QoS sysfs attributes at the right place Device PM QoS sysfs attributes, if present during device removal, are removed from within device_pm_remove(), which is too late, since dpm_sysfs_remove() has already removed the whole attribute group they belonged to. However, moving the removal of those attributes to dpm_sysfs_remove() alone is not sufficient, because in theory they still can be re-added right after being removed by it (the device's driver is still bound to it at that point). For this reason, move the entire desctruction of device PM QoS constraints to dpm_sysfs_remove() and make it prevent any new constraints from being added after it has run. Also, move the initialization of the power.qos field in struct device to device_pm_init_common() and drop the no longer needed dev_pm_qos_constraints_init(). Reported-by: Sasha Levin Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 - drivers/base/power/power.h | 8 +-- drivers/base/power/qos.c | 120 ++++++++++++++++--------------------- drivers/base/power/sysfs.c | 1 + 4 files changed, 55 insertions(+), 76 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2b7f77d3fcb0..15beb500a4e4 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -99,7 +99,6 @@ void device_pm_add(struct device *dev) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); - dev_pm_qos_constraints_init(dev); mutex_unlock(&dpm_list_mtx); } @@ -113,7 +112,6 @@ void device_pm_remove(struct device *dev) dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); complete_all(&dev->power.completion); mutex_lock(&dpm_list_mtx); - dev_pm_qos_constraints_destroy(dev); list_del_init(&dev->power.entry); mutex_unlock(&dpm_list_mtx); device_wakeup_disable(dev); diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index b16686a0a5a2..cfc3226ec492 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -4,7 +4,7 @@ static inline void device_pm_init_common(struct device *dev) { if (!dev->power.early_init) { spin_lock_init(&dev->power.lock); - dev->power.power_state = PMSG_INVALID; + dev->power.qos = NULL; dev->power.early_init = true; } } @@ -56,14 +56,10 @@ extern void device_pm_move_last(struct device *); static inline void device_pm_sleep_init(struct device *dev) {} -static inline void device_pm_add(struct device *dev) -{ - dev_pm_qos_constraints_init(dev); -} +static inline void device_pm_add(struct device *dev) {} static inline void device_pm_remove(struct device *dev) { - dev_pm_qos_constraints_destroy(dev); pm_runtime_remove(dev); } diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 2159d62c858a..5f74587ef258 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "power.h" @@ -61,7 +62,7 @@ enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) struct pm_qos_flags *pqf; s32 val; - if (!qos) + if (IS_ERR_OR_NULL(qos)) return PM_QOS_FLAGS_UNDEFINED; pqf = &qos->flags; @@ -101,7 +102,8 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_flags); */ s32 __dev_pm_qos_read_value(struct device *dev) { - return dev->power.qos ? pm_qos_read_value(&dev->power.qos->latency) : 0; + return IS_ERR_OR_NULL(dev->power.qos) ? + 0 : pm_qos_read_value(&dev->power.qos->latency); } /** @@ -198,20 +200,8 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) return 0; } -/** - * dev_pm_qos_constraints_init - Initalize device's PM QoS constraints pointer. - * @dev: target device - * - * Called from the device PM subsystem during device insertion under - * device_pm_lock(). - */ -void dev_pm_qos_constraints_init(struct device *dev) -{ - mutex_lock(&dev_pm_qos_mtx); - dev->power.qos = NULL; - dev->power.power_state = PMSG_ON; - mutex_unlock(&dev_pm_qos_mtx); -} +static void __dev_pm_qos_hide_latency_limit(struct device *dev); +static void __dev_pm_qos_hide_flags(struct device *dev); /** * dev_pm_qos_constraints_destroy @@ -226,16 +216,15 @@ void dev_pm_qos_constraints_destroy(struct device *dev) struct pm_qos_constraints *c; struct pm_qos_flags *f; + mutex_lock(&dev_pm_qos_mtx); + /* * If the device's PM QoS resume latency limit or PM QoS flags have been * exposed to user space, they have to be hidden at this point. */ - dev_pm_qos_hide_latency_limit(dev); - dev_pm_qos_hide_flags(dev); + __dev_pm_qos_hide_latency_limit(dev); + __dev_pm_qos_hide_flags(dev); - mutex_lock(&dev_pm_qos_mtx); - - dev->power.power_state = PMSG_INVALID; qos = dev->power.qos; if (!qos) goto out; @@ -257,7 +246,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev) } spin_lock_irq(&dev->power.lock); - dev->power.qos = NULL; + dev->power.qos = ERR_PTR(-ENODEV); spin_unlock_irq(&dev->power.lock); kfree(c->notifiers); @@ -301,32 +290,19 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, "%s() called for already added request\n", __func__)) return -EINVAL; - req->dev = dev; - mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos) { - if (dev->power.power_state.event == PM_EVENT_INVALID) { - /* The device has been removed from the system. */ - req->dev = NULL; - ret = -ENODEV; - goto out; - } else { - /* - * Allocate the constraints data on the first call to - * add_request, i.e. only if the data is not already - * allocated and if the device has not been removed. - */ - ret = dev_pm_qos_constraints_allocate(dev); - } - } + if (IS_ERR(dev->power.qos)) + ret = -ENODEV; + else if (!dev->power.qos) + ret = dev_pm_qos_constraints_allocate(dev); if (!ret) { + req->dev = dev; req->type = type; ret = apply_constraint(req, PM_QOS_ADD_REQ, value); } - out: mutex_unlock(&dev_pm_qos_mtx); return ret; @@ -351,7 +327,7 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, "%s() called for unknown object\n", __func__)) return -EINVAL; - if (!req->dev->power.qos) + if (IS_ERR_OR_NULL(req->dev->power.qos)) return -ENODEV; switch(req->type) { @@ -402,7 +378,7 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { - int ret = 0; + int ret; if (!req) /*guard against callers passing in null */ return -EINVAL; @@ -411,13 +387,11 @@ static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req) "%s() called for unknown object\n", __func__)) return -EINVAL; - if (req->dev->power.qos) { - ret = apply_constraint(req, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); - memset(req, 0, sizeof(*req)); - } else { - ret = -ENODEV; - } + if (IS_ERR_OR_NULL(req->dev->power.qos)) + return -ENODEV; + + ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); return ret; } @@ -466,9 +440,10 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos) - ret = dev->power.power_state.event != PM_EVENT_INVALID ? - dev_pm_qos_constraints_allocate(dev) : -ENODEV; + if (IS_ERR(dev->power.qos)) + ret = -ENODEV; + else if (!dev->power.qos) + ret = dev_pm_qos_constraints_allocate(dev); if (!ret) ret = blocking_notifier_chain_register( @@ -497,7 +472,7 @@ int dev_pm_qos_remove_notifier(struct device *dev, mutex_lock(&dev_pm_qos_mtx); /* Silently return if the constraints object is not present. */ - if (dev->power.qos) + if (!IS_ERR_OR_NULL(dev->power.qos)) retval = blocking_notifier_chain_unregister( dev->power.qos->latency.notifiers, notifier); @@ -608,7 +583,7 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos) + if (IS_ERR_OR_NULL(dev->power.qos)) ret = -ENODEV; else if (dev->power.qos->latency_req) ret = -EEXIST; @@ -630,6 +605,14 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); +static void __dev_pm_qos_hide_latency_limit(struct device *dev) +{ + if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->latency_req) { + pm_qos_sysfs_remove_latency(dev); + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); + } +} + /** * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space. * @dev: Device whose PM QoS latency limit is to be hidden from user space. @@ -637,12 +620,7 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); void dev_pm_qos_hide_latency_limit(struct device *dev) { mutex_lock(&dev_pm_qos_mtx); - - if (dev->power.qos && dev->power.qos->latency_req) { - pm_qos_sysfs_remove_latency(dev); - __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); - } - + __dev_pm_qos_hide_latency_limit(dev); mutex_unlock(&dev_pm_qos_mtx); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit); @@ -673,7 +651,7 @@ int dev_pm_qos_expose_flags(struct device *dev, s32 val) pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos) + if (IS_ERR_OR_NULL(dev->power.qos)) ret = -ENODEV; else if (dev->power.qos->flags_req) ret = -EEXIST; @@ -696,6 +674,14 @@ int dev_pm_qos_expose_flags(struct device *dev, s32 val) } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags); +static void __dev_pm_qos_hide_flags(struct device *dev) +{ + if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->flags_req) { + pm_qos_sysfs_remove_flags(dev); + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); + } +} + /** * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space. * @dev: Device whose PM QoS flags are to be hidden from user space. @@ -704,12 +690,7 @@ void dev_pm_qos_hide_flags(struct device *dev) { pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_mtx); - - if (dev->power.qos && dev->power.qos->flags_req) { - pm_qos_sysfs_remove_flags(dev); - __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); - } - + __dev_pm_qos_hide_flags(dev); mutex_unlock(&dev_pm_qos_mtx); pm_runtime_put(dev); } @@ -729,7 +710,7 @@ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos || !dev->power.qos->flags_req) { + if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->flags_req) { ret = -EINVAL; goto out; } @@ -747,4 +728,7 @@ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) pm_runtime_put(dev); return ret; } +#else /* !CONFIG_PM_RUNTIME */ +static void __dev_pm_qos_hide_latency_limit(struct device *dev) {} +static void __dev_pm_qos_hide_flags(struct device *dev) {} #endif /* CONFIG_PM_RUNTIME */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 50d16e3cb0a9..a53ebd265701 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -708,6 +708,7 @@ void rpm_sysfs_remove(struct device *dev) void dpm_sysfs_remove(struct device *dev) { + dev_pm_qos_constraints_destroy(dev); rpm_sysfs_remove(dev); sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); sysfs_remove_group(&dev->kobj, &pm_attr_group); From ed4cf5b23f9d21c441e5c8feead86f2e4a436923 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 22 Feb 2013 07:37:36 +0000 Subject: [PATCH 135/261] ACPI / Sleep: Avoid interleaved message on errors Got this dmesg log on an Acer Aspire 725. [ 0.256351] ACPI: (supports S0ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S1_] (20130117/hwxface-568) [ 0.256373] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S2_] (20130117/hwxface-568) [ 0.256391] S3 S4 S5) Avoid this interleaving error messages. Signed-off-by: Joe Perches Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6d3a06a629a1..24213033fbae 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -599,7 +599,6 @@ static void acpi_sleep_suspend_setup(void) status = acpi_get_sleep_type_data(i, &type_a, &type_b); if (ACPI_SUCCESS(status)) { sleep_states[i] = 1; - pr_cont(" S%d", i); } } @@ -742,7 +741,6 @@ static void acpi_sleep_hibernate_setup(void) hibernation_set_ops(old_suspend_ordering ? &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; - pr_cont(KERN_CONT " S4"); if (nosigcheck) return; @@ -788,6 +786,9 @@ int __init acpi_sleep_init(void) { acpi_status status; u8 type_a, type_b; + char supported[ACPI_S_STATE_COUNT * 3 + 1]; + char *pos = supported; + int i; if (acpi_disabled) return 0; @@ -795,7 +796,6 @@ int __init acpi_sleep_init(void) acpi_sleep_dmi_check(); sleep_states[ACPI_STATE_S0] = 1; - pr_info(PREFIX "(supports S0"); acpi_sleep_suspend_setup(); acpi_sleep_hibernate_setup(); @@ -803,11 +803,17 @@ int __init acpi_sleep_init(void) status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); if (ACPI_SUCCESS(status)) { sleep_states[ACPI_STATE_S5] = 1; - pr_cont(" S5"); pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; } - pr_cont(")\n"); + + supported[0] = 0; + for (i = 0; i < ACPI_S_STATE_COUNT; i++) { + if (sleep_states[i]) + pos += sprintf(pos, " S%d", i); + } + pr_info(PREFIX "(supports%s)\n", supported); + /* * Register the tts_notifier to reboot notifier list so that the _TTS * object can also be evaluated when the system enters S5. From 5273a258373a84bbbcbccabb356de5b68e2b8931 Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Sun, 24 Feb 2013 23:12:53 +0000 Subject: [PATCH 136/261] ACPI / processor: Remove redundant NULL check before kfree kfree() on a NULL pointer is a no-op, so remove a redundant NULL pointer check in map_mat_entry(). [rjw: Changelog] Signed-off-by: Syam Sidhardhan Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index eff722278ff5..164d49569aeb 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -158,8 +158,7 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) } exit: - if (buffer.pointer) - kfree(buffer.pointer); + kfree(buffer.pointer); return apic_id; } From 9b27516fcd7ab7dc416edf418446c24c61729938 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 27 Feb 2013 04:27:30 +0000 Subject: [PATCH 137/261] ACPI / porocessor: Beautify code, pr->id is u32 which is never < 0 pr->id is u32 which never < 0, so remove the redundant pr->id < 0 check from acpi_processor_add(). [rjw: Changelog] Signed-off-by: Chen Gang Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index df34bd04ae62..bec717ffd25f 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -559,7 +559,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) return 0; #endif - BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); + BUG_ON(pr->id >= nr_cpu_ids); /* * Buggy BIOS check From 53540098b23c3884b4a0b4f220b9d977bc496af3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:35:20 +0100 Subject: [PATCH 138/261] ACPI / glue: Add .match() callback to struct acpi_bus_type USB uses the .find_bridge() callback from struct acpi_bus_type incorrectly, because as a result of the way it is used by USB every device in the system that doesn't have a bus type or parent is passed to usb_acpi_find_device() for inspection. What USB actually needs, though, is to call usb_acpi_find_device() for USB ports that don't have a bus type defined, but have usb_port_device_type as their device type, as well as for USB devices. To fix that replace the struct bus_type pointer in struct acpi_bus_type used for matching devices to specific subsystems with a .match() callback to be used for this purpose and update the users of struct acpi_bus_type, including USB, accordingly. Define the .match() callback routine for USB, usb_acpi_bus_match(), in such a way that it will cover both USB devices and USB ports and remove the now redundant .find_bridge() callback pointer from usb_acpi_bus. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Yinghai Lu Acked-by: Jeff Garzik --- drivers/acpi/glue.c | 39 +++++++++++++------------------------ drivers/ata/libata-acpi.c | 1 + drivers/pci/pci-acpi.c | 8 +++++++- drivers/pnp/pnpacpi/core.c | 8 +++++++- drivers/scsi/scsi_lib.c | 7 ++++++- drivers/usb/core/usb-acpi.c | 9 +++++++-- include/acpi/acpi_bus.h | 3 ++- 7 files changed, 43 insertions(+), 32 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index ef6f155469b5..b94d14721af3 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -36,12 +36,11 @@ int register_acpi_bus_type(struct acpi_bus_type *type) { if (acpi_disabled) return -ENODEV; - if (type && type->bus && type->find_device) { + if (type && type->match && type->find_device) { down_write(&bus_type_sem); list_add_tail(&type->list, &bus_type_list); up_write(&bus_type_sem); - printk(KERN_INFO PREFIX "bus type %s registered\n", - type->bus->name); + printk(KERN_INFO PREFIX "bus type %s registered\n", type->name); return 0; } return -ENODEV; @@ -56,24 +55,21 @@ int unregister_acpi_bus_type(struct acpi_bus_type *type) down_write(&bus_type_sem); list_del_init(&type->list); up_write(&bus_type_sem); - printk(KERN_INFO PREFIX "ACPI bus type %s unregistered\n", - type->bus->name); + printk(KERN_INFO PREFIX "bus type %s unregistered\n", + type->name); return 0; } return -ENODEV; } EXPORT_SYMBOL_GPL(unregister_acpi_bus_type); -static struct acpi_bus_type *acpi_get_bus_type(struct bus_type *type) +static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) { struct acpi_bus_type *tmp, *ret = NULL; - if (!type) - return NULL; - down_read(&bus_type_sem); list_for_each_entry(tmp, &bus_type_list, list) { - if (tmp->bus == type) { + if (tmp->match(dev)) { ret = tmp; break; } @@ -261,26 +257,17 @@ err: static int acpi_platform_notify(struct device *dev) { - struct acpi_bus_type *type; + struct acpi_bus_type *type = acpi_get_bus_type(dev); acpi_handle handle; int ret; ret = acpi_bind_one(dev, NULL); - if (ret && (!dev->bus || !dev->parent)) { - /* bridge devices genernally haven't bus or parent */ - ret = acpi_find_bridge_device(dev, &handle); - if (!ret) { - ret = acpi_bind_one(dev, handle); - if (ret) - goto out; - } - } - - type = acpi_get_bus_type(dev->bus); if (ret) { - if (!type || !type->find_device) { - DBG("No ACPI bus support for %s\n", dev_name(dev)); - ret = -EINVAL; + if (!type) { + ret = acpi_find_bridge_device(dev, &handle); + if (!ret) + ret = acpi_bind_one(dev, handle); + goto out; } @@ -316,7 +303,7 @@ static int acpi_platform_notify_remove(struct device *dev) { struct acpi_bus_type *type; - type = acpi_get_bus_type(dev->bus); + type = acpi_get_bus_type(dev); if (type && type->cleanup) type->cleanup(dev); diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 0ea1018280bd..c832a5ca09ad 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -1150,6 +1150,7 @@ static int ata_acpi_find_dummy(struct device *dev, acpi_handle *handle) } static struct acpi_bus_type ata_acpi_bus = { + .name = "ATA", .find_bridge = ata_acpi_find_dummy, .find_device = ata_acpi_find_device, }; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 39c937f9b426..dee5dddaa292 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -331,8 +331,14 @@ static void pci_acpi_cleanup(struct device *dev) } } +static bool pci_acpi_bus_match(struct device *dev) +{ + return dev->bus == &pci_bus_type; +} + static struct acpi_bus_type acpi_pci_bus = { - .bus = &pci_bus_type, + .name = "PCI", + .match = pci_acpi_bus_match, .find_device = acpi_pci_find_device, .setup = pci_acpi_setup, .cleanup = pci_acpi_cleanup, diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 8813fc03aa09..55cd459a3908 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -353,8 +353,14 @@ static int __init acpi_pnp_find_device(struct device *dev, acpi_handle * handle) /* complete initialization of a PNPACPI device includes having * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling. */ +static bool acpi_pnp_bus_match(struct device *dev) +{ + return dev->bus == &pnp_bus_type; +} + static struct acpi_bus_type __initdata acpi_pnp_bus = { - .bus = &pnp_bus_type, + .name = "PNP", + .match = acpi_pnp_bus_match, .find_device = acpi_pnp_find_device, }; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 765398c063c7..c31187d79343 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -71,9 +71,14 @@ struct kmem_cache *scsi_sdb_cache; #ifdef CONFIG_ACPI #include +static bool acpi_scsi_bus_match(struct device *dev) +{ + return dev->bus == &scsi_bus_type; +} + int scsi_register_acpi_bus_type(struct acpi_bus_type *bus) { - bus->bus = &scsi_bus_type; + bus->match = acpi_scsi_bus_match; return register_acpi_bus_type(bus); } EXPORT_SYMBOL_GPL(scsi_register_acpi_bus_type); diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index cef4252bb31a..b6f4bad3f756 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -210,9 +210,14 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle) return 0; } +static bool usb_acpi_bus_match(struct device *dev) +{ + return is_usb_device(dev) || is_usb_port(dev); +} + static struct acpi_bus_type usb_acpi_bus = { - .bus = &usb_bus_type, - .find_bridge = usb_acpi_find_device, + .name = "USB", + .match = usb_acpi_bus_match, .find_device = usb_acpi_find_device, }; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e65278f560c4..c751d7de3a5f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -437,7 +437,8 @@ void acpi_remove_dir(struct acpi_device *); */ struct acpi_bus_type { struct list_head list; - struct bus_type *bus; + const char *name; + bool (*match)(struct device *dev); /* For general devices under the bus */ int (*find_device) (struct device *, acpi_handle *); /* For bridges, such as PCI root bridge, IDE controller */ From 924144818cf0edc5d9d70d3a44e7cbbf4544796c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2013 22:35:44 +0100 Subject: [PATCH 139/261] ACPI / glue: Drop .find_bridge() callback from struct acpi_bus_type After PCI and USB have stopped using the .find_bridge() callback in struct acpi_bus_type, the only remaining user of it is SATA, but SATA only pretends to be a user, because it points that callback to a stub always returning -ENODEV. For this reason, drop the SATA's dummy .find_bridge() callback and remove .find_bridge(), which is not used any more, from struct acpi_bus_type entirely. Signed-off-by: Rafael J. Wysocki Acked-by: Yinghai Lu Acked-by: Jeff Garzik --- drivers/acpi/glue.c | 26 +------------------------- drivers/ata/libata-acpi.c | 6 ------ include/acpi/acpi_bus.h | 3 --- 3 files changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index b94d14721af3..40a84cc6740c 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -78,22 +78,6 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) return ret; } -static int acpi_find_bridge_device(struct device *dev, acpi_handle * handle) -{ - struct acpi_bus_type *tmp; - int ret = -ENODEV; - - down_read(&bus_type_sem); - list_for_each_entry(tmp, &bus_type_list, list) { - if (tmp->find_bridge && !tmp->find_bridge(dev, handle)) { - ret = 0; - break; - } - } - up_read(&bus_type_sem); - return ret; -} - static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used, void *addr_p, void **ret_p) { @@ -262,15 +246,7 @@ static int acpi_platform_notify(struct device *dev) int ret; ret = acpi_bind_one(dev, NULL); - if (ret) { - if (!type) { - ret = acpi_find_bridge_device(dev, &handle); - if (!ret) - ret = acpi_bind_one(dev, handle); - - goto out; - } - + if (ret && type) { ret = type->find_device(dev, &handle); if (ret) { DBG("Unable to get handle for %s\n", dev_name(dev)); diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index c832a5ca09ad..beea3115577e 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -1144,14 +1144,8 @@ static int ata_acpi_find_device(struct device *dev, acpi_handle *handle) return -ENODEV; } -static int ata_acpi_find_dummy(struct device *dev, acpi_handle *handle) -{ - return -ENODEV; -} - static struct acpi_bus_type ata_acpi_bus = { .name = "ATA", - .find_bridge = ata_acpi_find_dummy, .find_device = ata_acpi_find_device, }; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c751d7de3a5f..22ba56e834e2 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -439,10 +439,7 @@ struct acpi_bus_type { struct list_head list; const char *name; bool (*match)(struct device *dev); - /* For general devices under the bus */ int (*find_device) (struct device *, acpi_handle *); - /* For bridges, such as PCI root bridge, IDE controller */ - int (*find_bridge) (struct device *, acpi_handle *); void (*setup)(struct device *); void (*cleanup)(struct device *); }; From 85c50a5899b23f4f893b0898b286023157b98376 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Mar 2013 15:19:18 +0300 Subject: [PATCH 140/261] ALSA: seq: seq_oss_event: missing range checks The "dev" variable could be out of bounds. Calling snd_seq_oss_synth_is_valid() checks that it is is a valid device which has been opened. We check this inside set_note_event() so this function can't succeed without a valid "dev". But we need to do the check earlier to prevent invalid dereferences and memory corruption. One call tree where "dev" could be out of bounds is: -> snd_seq_oss_oob_user() -> snd_seq_oss_process_event() -> extended_event() -> note_on_event() Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_event.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_event.c b/sound/core/seq/oss/seq_oss_event.c index 066f5f3e3f4c..c3908862bc8b 100644 --- a/sound/core/seq/oss/seq_oss_event.c +++ b/sound/core/seq/oss/seq_oss_event.c @@ -285,7 +285,12 @@ local_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { - struct seq_oss_synthinfo *info = &dp->synths[dev]; + struct seq_oss_synthinfo *info; + + if (!snd_seq_oss_synth_is_valid(dp, dev)) + return -ENXIO; + + info = &dp->synths[dev]; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { @@ -340,7 +345,12 @@ note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, st static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { - struct seq_oss_synthinfo *info = &dp->synths[dev]; + struct seq_oss_synthinfo *info; + + if (!snd_seq_oss_synth_is_valid(dp, dev)) + return -ENXIO; + + info = &dp->synths[dev]; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { From 5a114b98661e3aaa0ac085eb931584dce3b0ef9b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 4 Mar 2013 11:19:09 -0500 Subject: [PATCH 141/261] tile: work around bug in the generic sys_llseek sys_llseek should specify the high and low 32-bit seek values as "unsigned int" but instead it specifies "unsigned long". Since compat syscall arguments are always sign-extended on tile, this means that a seek value of 0xffffffff will be incorrectly interpreted as a value of -1ULL. To avoid the risk of breaking binary compatibility on architectures that already use sys_llseek this way, we follow the same path as MIPS and provide a wrapper override. Signed-off-by: Chris Metcalf Cc: stable@kernel.org [v3.6 onwards] --- arch/tile/include/asm/compat.h | 3 +++ arch/tile/kernel/compat.c | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 001d418a8957..78f1f2ded86c 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -288,6 +288,9 @@ long compat_sys_sync_file_range2(int fd, unsigned int flags, long compat_sys_fallocate(int fd, int mode, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi); +long compat_sys_llseek(unsigned int fd, unsigned int offset_high, + unsigned int offset_low, loff_t __user * result, + unsigned int origin); /* Assembly trampoline to avoid clobbering r0. */ long _compat_sys_rt_sigreturn(void); diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 7f72401b4f45..69034e215742 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -76,6 +76,18 @@ long compat_sys_fallocate(int fd, int mode, ((loff_t)len_hi << 32) | len_lo); } +/* + * Avoid bug in generic sys_llseek() that specifies offset_high and + * offset_low as "unsigned long", thus making it possible to pass + * a sign-extended high 32 bits in offset_low. + */ +long compat_sys_llseek(unsigned int fd, unsigned int offset_high, + unsigned int offset_low, loff_t __user * result, + unsigned int origin) +{ + return sys_llseek(fd, offset_high, offset_low, result, origin); +} + /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -83,6 +95,7 @@ long compat_sys_fallocate(int fd, int mode, /* See comments in sys.c */ #define compat_sys_fadvise64_64 sys32_fadvise64_64 #define compat_sys_readahead sys32_readahead +#define sys_llseek compat_sys_llseek /* Call the assembly trampolines where necessary. */ #define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn From 61bc95c1fbbb6a08b55bbe161fdf1ea5493fc595 Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Mon, 4 Mar 2013 09:24:38 -0500 Subject: [PATCH 142/261] DRM/i915: On G45 enable cursor plane briefly after enabling the display plane. On G45 some low res modes (800x600 and 1024x768) produce a blank screen when the display plane is enabled with with cursor plane off. Experiments showed that this issue occurred when the following conditions were met: a. a previous mode had the cursor plane enabled (Xserver). b. this mode or the previous one was using self refresh. (Thus the problem was only seen with low res modes). The screens lit up as soon as the cursor plane got enabled. Therefore the blank screen occurred only in console mode, not when running an Xserver. It also seemed to be necessary to disable self refresh while briefly enabling the cursor plane. Signed-off-by: Egbert Eich Bugzilla: https://bugs.freedesktop.org/attachment.cgi?bugid=61457 Acked-by: Chris Wilson [danvet: drop spurious whitespace change.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 09659ff6d24d..0a02e044b653 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3604,6 +3604,30 @@ static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) */ } +/** + * i9xx_fixup_plane - ugly workaround for G45 to fire up the hardware + * cursor plane briefly if not already running after enabling the display + * plane. + * This workaround avoids occasional blank screens when self refresh is + * enabled. + */ +static void +g4x_fixup_plane(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + u32 cntl = I915_READ(CURCNTR(pipe)); + + if ((cntl & CURSOR_MODE) == 0) { + u32 fw_bcl_self = I915_READ(FW_BLC_SELF); + + I915_WRITE(FW_BLC_SELF, fw_bcl_self & ~FW_BLC_SELF_EN); + I915_WRITE(CURCNTR(pipe), CURSOR_MODE_64_ARGB_AX); + intel_wait_for_vblank(dev_priv->dev, pipe); + I915_WRITE(CURCNTR(pipe), cntl); + I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); + I915_WRITE(FW_BLC_SELF, fw_bcl_self); + } +} + static void i9xx_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -3629,6 +3653,8 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) intel_enable_pipe(dev_priv, pipe, false); intel_enable_plane(dev_priv, plane, pipe); + if (IS_G4X(dev)) + g4x_fixup_plane(dev_priv, pipe); intel_crtc_load_lut(crtc); intel_update_fbc(dev); From b980955236922ae6106774511c5c05003d3ad225 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 4 Mar 2013 11:59:12 -0500 Subject: [PATCH 143/261] random: fix locking dependency with the tasklist_lock Commit 6133705494bb introduced a circular lock dependency because posix_cpu_timers_exit() is called by release_task(), which is holding a writer lock on tasklist_lock, and this can cause a deadlock since kill_fasync() gets called with nonblocking_pool.lock taken. There's no reason why kill_fasync() needs to be taken while the random pool is locked, so move it out to fix this locking dependency. Signed-off-by: "Theodore Ts'o" Reported-by: Russ Dill Cc: stable@kernel.org --- drivers/char/random.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 85e81ec1451e..57d4b152267c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -852,6 +852,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, int reserved) { unsigned long flags; + int wakeup_write = 0; /* Hold lock while accounting */ spin_lock_irqsave(&r->lock, flags); @@ -873,10 +874,8 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, else r->entropy_count = reserved; - if (r->entropy_count < random_write_wakeup_thresh) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } + if (r->entropy_count < random_write_wakeup_thresh) + wakeup_write = 1; } DEBUG_ENT("debiting %zu entropy credits from %s%s\n", @@ -884,6 +883,11 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, spin_unlock_irqrestore(&r->lock, flags); + if (wakeup_write) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } + return nbytes; } From 87c319a2c3c2efd397281089b9cdce3050febeff Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 4 Mar 2013 13:37:32 -0500 Subject: [PATCH 144/261] tile: properly use COMPAT_SYSCALL_DEFINEx This was pointed out by Al Viro. Using the correct wrappers properly does sign extension as necessary on syscall arguments. Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 69034e215742..6ea4cdb3c6a0 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -32,45 +32,48 @@ * adapt the usual convention. */ -long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE4(truncate64, char __user *, filename, u32, dummy, + u32, low, u32, high) { return sys_truncate(filename, ((loff_t)high << 32) | low); } -long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE4(ftruncate64, unsigned int, fd, u32, dummy, + u32, low, u32, high) { return sys_ftruncate(fd, ((loff_t)high << 32) | low); } -long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, - u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE6(pread64, unsigned int, fd, char __user *, ubuf, + size_t, count, u32, dummy, u32, low, u32, high) { return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, - u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE6(pwrite64, unsigned int, fd, char __user *, ubuf, + size_t, count, u32, dummy, u32, low, u32, high) { return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len) +COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, low, u32, high, + char __user *, buf, size_t, len) { return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len); } -long compat_sys_sync_file_range2(int fd, unsigned int flags, - u32 offset_lo, u32 offset_hi, - u32 nbytes_lo, u32 nbytes_hi) +COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags, + u32, offset_lo, u32, offset_hi, + u32, nbytes_lo, u32, nbytes_hi) { return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)nbytes_hi << 32) | nbytes_lo, flags); } -long compat_sys_fallocate(int fd, int mode, - u32 offset_lo, u32 offset_hi, - u32 len_lo, u32 len_hi) +COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, + u32, offset_lo, u32, offset_hi, + u32, len_lo, u32, len_hi) { return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)len_hi << 32) | len_lo); @@ -81,9 +84,9 @@ long compat_sys_fallocate(int fd, int mode, * offset_low as "unsigned long", thus making it possible to pass * a sign-extended high 32 bits in offset_low. */ -long compat_sys_llseek(unsigned int fd, unsigned int offset_high, - unsigned int offset_low, loff_t __user * result, - unsigned int origin) +COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, + unsigned int, offset_low, loff_t __user *, result, + unsigned int, origin) { return sys_llseek(fd, offset_high, offset_low, result, origin); } From ece6b0a2b25652d684a7ced4ae680a863af041e0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 3 Mar 2013 16:18:11 +0000 Subject: [PATCH 145/261] rds: limit the size allocated by rds_message_alloc() Dave Jones reported the following bug: "When fed mangled socket data, rds will trust what userspace gives it, and tries to allocate enormous amounts of memory larger than what kmalloc can satisfy." WARNING: at mm/page_alloc.c:2393 __alloc_pages_nodemask+0xa0d/0xbe0() Hardware name: GA-MA78GM-S2H Modules linked in: vmw_vsock_vmci_transport vmw_vmci vsock fuse bnep dlci bridge 8021q garp stp mrp binfmt_misc l2tp_ppp l2tp_core rfcomm s Pid: 24652, comm: trinity-child2 Not tainted 3.8.0+ #65 Call Trace: [] warn_slowpath_common+0x75/0xa0 [] warn_slowpath_null+0x1a/0x20 [] __alloc_pages_nodemask+0xa0d/0xbe0 [] ? native_sched_clock+0x26/0x90 [] ? trace_hardirqs_off_caller+0x28/0xc0 [] ? trace_hardirqs_off+0xd/0x10 [] alloc_pages_current+0xb8/0x180 [] __get_free_pages+0x2a/0x80 [] kmalloc_order_trace+0x3e/0x1a0 [] __kmalloc+0x2f5/0x3a0 [] ? local_bh_enable_ip+0x7c/0xf0 [] rds_message_alloc+0x23/0xb0 [rds] [] rds_sendmsg+0x2b1/0x990 [rds] [] ? trace_hardirqs_off+0xd/0x10 [] sock_sendmsg+0xb0/0xe0 [] ? get_lock_stats+0x22/0x70 [] ? put_lock_stats.isra.23+0xe/0x40 [] sys_sendto+0x130/0x180 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irq+0x3b/0x60 [] ? sysret_check+0x1b/0x56 [] ? trace_hardirqs_on_caller+0x115/0x1a0 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b ---[ end trace eed6ae990d018c8b ]--- Reported-by: Dave Jones Cc: Dave Jones Cc: David S. Miller Cc: Venkat Venkatsubra Signed-off-by: Cong Wang Acked-by: Venkat Venkatsubra Signed-off-by: David S. Miller --- net/rds/message.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/message.c b/net/rds/message.c index f0a4658f3273..aff589cc022e 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -197,6 +197,9 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) { struct rds_message *rm; + if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message)) + return NULL; + rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); if (!rm) goto out; From 3f736868b47687d1336fe88185560b22bb92021e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 3 Mar 2013 16:28:27 +0000 Subject: [PATCH 146/261] sctp: use KMALLOC_MAX_SIZE instead of its own MAX_KMALLOC_SIZE Don't definite its own MAX_KMALLOC_SIZE, use the one defined in mm. Cc: Vlad Yasevich Cc: Sridhar Samudrala Cc: Neil Horman Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ssnmap.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 442ad4ed6315..825ea94415b3 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -41,8 +41,6 @@ #include #include -#define MAX_KMALLOC_SIZE 131072 - static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in, __u16 out); @@ -65,7 +63,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int size; size = sctp_ssnmap_size(in, out); - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) retval = kmalloc(size, gfp); else retval = (struct sctp_ssnmap *) @@ -82,7 +80,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, return retval; fail_map: - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) kfree(retval); else free_pages((unsigned long)retval, get_order(size)); @@ -124,7 +122,7 @@ void sctp_ssnmap_free(struct sctp_ssnmap *map) int size; size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) kfree(map); else free_pages((unsigned long)map, get_order(size)); From de5fb0a053482d89262c3284b67884cd2c621adc Mon Sep 17 00:00:00 2001 From: Frank Li Date: Sun, 3 Mar 2013 17:34:25 +0000 Subject: [PATCH 147/261] net: fec: put tx to napi poll function to fix dead lock up stack ndo_start_xmit already hold lock. fec_enet_start_xmit needn't spin lock. stat_xmit just update fep->cur_tx fec_enet_tx just update fep->dirty_tx Reserve a empty bdb to check full or empty cur_tx == dirty_tx means full cur_tx == dirty_tx +1 means empty So needn't is_full variable. Fix spin lock deadlock ================================= [ INFO: inconsistent lock state ] 3.8.0-rc5+ #107 Not tainted --------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. ptp4l/615 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&list->lock)->rlock#3){?.-...}, at: [<8042c3c4>] skb_queue_tail+0x20/0x50 {HARDIRQ-ON-W} state was registered at: [<80067250>] mark_lock+0x154/0x4e8 [<800676f4>] mark_irqflags+0x110/0x1a4 [<80069208>] __lock_acquire+0x494/0x9c0 [<80069ce8>] lock_acquire+0x90/0xa4 [<80527ad0>] _raw_spin_lock_bh+0x44/0x54 [<804877e0>] first_packet_length+0x38/0x1f0 [<804879e4>] udp_poll+0x4c/0x5c [<804231f8>] sock_poll+0x24/0x28 [<800d27f0>] do_poll.isra.10+0x120/0x254 [<800d36e4>] do_sys_poll+0x15c/0x1e8 [<800d3828>] sys_poll+0x60/0xc8 [<8000e780>] ret_fast_syscall+0x0/0x3c *** DEADLOCK *** 1 lock held by ptp4l/615: #0: (&(&fep->hw_lock)->rlock){-.-...}, at: [<80355f9c>] fec_enet_tx+0x24/0x268 stack backtrace: Backtrace: [<800121e0>] (dump_backtrace+0x0/0x10c) from [<80516210>] (dump_stack+0x18/0x1c) r6:8063b1fc r5:bf38b2f8 r4:bf38b000 r3:bf38b000 [<805161f8>] (dump_stack+0x0/0x1c) from [<805189d0>] (print_usage_bug.part.34+0x164/0x1a4) [<8051886c>] (print_usage_bug.part.34+0x0/0x1a4) from [<80518a88>] (print_usage_bug+0x78/0x88) r8:80065664 r7:bf38b2f8 r6:00000002 r5:00000000 r4:bf38b000 [<80518a10>] (print_usage_bug+0x0/0x88) from [<80518b58>] (mark_lock_irq+0xc0/0x270) r7:bf38b000 r6:00000002 r5:bf38b2f8 r4:00000000 [<80518a98>] (mark_lock_irq+0x0/0x270) from [<80067270>] (mark_lock+0x174/0x4e8) [<800670fc>] (mark_lock+0x0/0x4e8) from [<80067744>] (mark_irqflags+0x160/0x1a4) [<800675e4>] (mark_irqflags+0x0/0x1a4) from [<80069208>] (__lock_acquire+0x494/0x9c0) r5:00000002 r4:bf38b2f8 [<80068d74>] (__lock_acquire+0x0/0x9c0) from [<80069ce8>] (lock_acquire+0x90/0xa4) [<80069c58>] (lock_acquire+0x0/0xa4) from [<805278d8>] (_raw_spin_lock_irqsave+0x4c/0x60) [<8052788c>] (_raw_spin_lock_irqsave+0x0/0x60) from [<8042c3c4>] (skb_queue_tail+0x20/0x50) r6:bfbb2180 r5:bf1d0190 r4:bf1d0184 [<8042c3a4>] (skb_queue_tail+0x0/0x50) from [<8042c4cc>] (sock_queue_err_skb+0xd8/0x188) r6:00000056 r5:bfbb2180 r4:bf1d0000 r3:00000000 [<8042c3f4>] (sock_queue_err_skb+0x0/0x188) from [<8042d15c>] (skb_tstamp_tx+0x70/0xa0) r6:bf0dddb0 r5:bf1d0000 r4:bfbb2180 r3:00000004 [<8042d0ec>] (skb_tstamp_tx+0x0/0xa0) from [<803561d0>] (fec_enet_tx+0x258/0x268) r6:c089d260 r5:00001c00 r4:bfbd0000 [<80355f78>] (fec_enet_tx+0x0/0x268) from [<803562cc>] (fec_enet_interrupt+0xec/0xf8) [<803561e0>] (fec_enet_interrupt+0x0/0xf8) from [<8007d5b0>] (handle_irq_event_percpu+0x54/0x1a0) [<8007d55c>] (handle_irq_event_percpu+0x0/0x1a0) from [<8007d740>] (handle_irq_event+0x44/0x64) [<8007d6fc>] (handle_irq_event+0x0/0x64) from [<80080690>] (handle_fasteoi_irq+0xc4/0x15c) r6:bf0dc000 r5:bf811290 r4:bf811240 r3:00000000 [<800805cc>] (handle_fasteoi_irq+0x0/0x15c) from [<8007ceec>] (generic_handle_irq+0x28/0x38) r5:807130c8 r4:00000096 [<8007cec4>] (generic_handle_irq+0x0/0x38) from [<8000f16c>] (handle_IRQ+0x54/0xb4) r4:8071d280 r3:00000180 [<8000f118>] (handle_IRQ+0x0/0xb4) from [<80008544>] (gic_handle_irq+0x30/0x64) r8:8000e924 r7:f4000100 r6:bf0ddef8 r5:8071c974 r4:f400010c r3:00000000 [<80008514>] (gic_handle_irq+0x0/0x64) from [<8000e2e4>] (__irq_svc+0x44/0x5c) Exception stack(0xbf0ddef8 to 0xbf0ddf40) Signed-off-by: Frank Li Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.c | 87 ++++++++++++++-------------- drivers/net/ethernet/freescale/fec.h | 3 - 2 files changed, 42 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c index fccc3bf2141d..069a155d16ed 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec.c @@ -246,14 +246,13 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct bufdesc *bdp; void *bufaddr; unsigned short status; - unsigned long flags; + unsigned int index; if (!fep->link) { /* Link is down or autonegotiation is in progress. */ return NETDEV_TX_BUSY; } - spin_lock_irqsave(&fep->hw_lock, flags); /* Fill in a Tx ring entry */ bdp = fep->cur_tx; @@ -264,7 +263,6 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) * This should not happen, since ndev->tbusy should be set. */ printk("%s: tx queue full!.\n", ndev->name); - spin_unlock_irqrestore(&fep->hw_lock, flags); return NETDEV_TX_BUSY; } @@ -280,13 +278,13 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) * 4-byte boundaries. Use bounce buffers to copy data * and get it aligned. Ugh. */ + if (fep->bufdesc_ex) + index = (struct bufdesc_ex *)bdp - + (struct bufdesc_ex *)fep->tx_bd_base; + else + index = bdp - fep->tx_bd_base; + if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { - unsigned int index; - if (fep->bufdesc_ex) - index = (struct bufdesc_ex *)bdp - - (struct bufdesc_ex *)fep->tx_bd_base; - else - index = bdp - fep->tx_bd_base; memcpy(fep->tx_bounce[index], skb->data, skb->len); bufaddr = fep->tx_bounce[index]; } @@ -300,10 +298,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) swap_buffer(bufaddr, skb->len); /* Save skb pointer */ - fep->tx_skbuff[fep->skb_cur] = skb; - - ndev->stats.tx_bytes += skb->len; - fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK; + fep->tx_skbuff[index] = skb; /* Push the data cache so the CPM does not get stale memory * data. @@ -331,25 +326,21 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) ebdp->cbd_esc = BD_ENET_TX_INT; } } - /* Trigger transmission start */ - writel(0, fep->hwp + FEC_X_DES_ACTIVE); - /* If this was the last BD in the ring, start at the beginning again. */ if (status & BD_ENET_TX_WRAP) bdp = fep->tx_bd_base; else bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); - if (bdp == fep->dirty_tx) { - fep->tx_full = 1; - netif_stop_queue(ndev); - } - fep->cur_tx = bdp; - skb_tx_timestamp(skb); + if (fep->cur_tx == fep->dirty_tx) + netif_stop_queue(ndev); - spin_unlock_irqrestore(&fep->hw_lock, flags); + /* Trigger transmission start */ + writel(0, fep->hwp + FEC_X_DES_ACTIVE); + + skb_tx_timestamp(skb); return NETDEV_TX_OK; } @@ -406,11 +397,8 @@ fec_restart(struct net_device *ndev, int duplex) writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) * RX_RING_SIZE, fep->hwp + FEC_X_DES_START); - fep->dirty_tx = fep->cur_tx = fep->tx_bd_base; fep->cur_rx = fep->rx_bd_base; - /* Reset SKB transmit buffers. */ - fep->skb_cur = fep->skb_dirty = 0; for (i = 0; i <= TX_RING_MOD_MASK; i++) { if (fep->tx_skbuff[i]) { dev_kfree_skb_any(fep->tx_skbuff[i]); @@ -573,20 +561,35 @@ fec_enet_tx(struct net_device *ndev) struct bufdesc *bdp; unsigned short status; struct sk_buff *skb; + int index = 0; fep = netdev_priv(ndev); - spin_lock(&fep->hw_lock); bdp = fep->dirty_tx; + /* get next bdp of dirty_tx */ + if (bdp->cbd_sc & BD_ENET_TX_WRAP) + bdp = fep->tx_bd_base; + else + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) { - if (bdp == fep->cur_tx && fep->tx_full == 0) + + /* current queue is empty */ + if (bdp == fep->cur_tx) break; + if (fep->bufdesc_ex) + index = (struct bufdesc_ex *)bdp - + (struct bufdesc_ex *)fep->tx_bd_base; + else + index = bdp - fep->tx_bd_base; + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE); bdp->cbd_bufaddr = 0; - skb = fep->tx_skbuff[fep->skb_dirty]; + skb = fep->tx_skbuff[index]; + /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN | @@ -631,8 +634,9 @@ fec_enet_tx(struct net_device *ndev) /* Free the sk buffer associated with this last transmit */ dev_kfree_skb_any(skb); - fep->tx_skbuff[fep->skb_dirty] = NULL; - fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK; + fep->tx_skbuff[index] = NULL; + + fep->dirty_tx = bdp; /* Update pointer to next buffer descriptor to be transmitted */ if (status & BD_ENET_TX_WRAP) @@ -642,14 +646,12 @@ fec_enet_tx(struct net_device *ndev) /* Since we have freed up a buffer, the ring is no longer full */ - if (fep->tx_full) { - fep->tx_full = 0; + if (fep->dirty_tx != fep->cur_tx) { if (netif_queue_stopped(ndev)) netif_wake_queue(ndev); } } - fep->dirty_tx = bdp; - spin_unlock(&fep->hw_lock); + return; } @@ -816,7 +818,7 @@ fec_enet_interrupt(int irq, void *dev_id) int_events = readl(fep->hwp + FEC_IEVENT); writel(int_events, fep->hwp + FEC_IEVENT); - if (int_events & FEC_ENET_RXF) { + if (int_events & (FEC_ENET_RXF | FEC_ENET_TXF)) { ret = IRQ_HANDLED; /* Disable the RX interrupt */ @@ -827,15 +829,6 @@ fec_enet_interrupt(int irq, void *dev_id) } } - /* Transmit OK, or non-fatal error. Update the buffer - * descriptors. FEC handles all errors, we just discover - * them as part of the transmit process. - */ - if (int_events & FEC_ENET_TXF) { - ret = IRQ_HANDLED; - fec_enet_tx(ndev); - } - if (int_events & FEC_ENET_MII) { ret = IRQ_HANDLED; complete(&fep->mdio_done); @@ -851,6 +844,8 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) int pkts = fec_enet_rx(ndev, budget); struct fec_enet_private *fep = netdev_priv(ndev); + fec_enet_tx(ndev); + if (pkts < budget) { napi_complete(napi); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); @@ -1646,6 +1641,7 @@ static int fec_enet_init(struct net_device *ndev) /* ...and the same for transmit */ bdp = fep->tx_bd_base; + fep->cur_tx = bdp; for (i = 0; i < TX_RING_SIZE; i++) { /* Initialize the BD for every fragment in the page. */ @@ -1657,6 +1653,7 @@ static int fec_enet_init(struct net_device *ndev) /* Set the last buffer to wrap */ bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); bdp->cbd_sc |= BD_SC_WRAP; + fep->dirty_tx = bdp; fec_restart(ndev, 0); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 01579b8e37c4..c0f63be91ff7 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -214,8 +214,6 @@ struct fec_enet_private { unsigned char *tx_bounce[TX_RING_SIZE]; struct sk_buff *tx_skbuff[TX_RING_SIZE]; struct sk_buff *rx_skbuff[RX_RING_SIZE]; - ushort skb_cur; - ushort skb_dirty; /* CPM dual port RAM relative addresses */ dma_addr_t bd_dma; @@ -227,7 +225,6 @@ struct fec_enet_private { /* The ring entries to be free()ed */ struct bufdesc *dirty_tx; - uint tx_full; /* hold while accessing the HW like ringbuffer for tx/rx but not MAC */ spinlock_t hw_lock; From 3e8b0ac3e41e3c882222a5522d5df7212438ab51 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Sun, 3 Mar 2013 20:46:46 +0000 Subject: [PATCH 148/261] net: ipv6: Don't purge default router if accept_ra=2 Setting net.ipv6.conf..accept_ra=2 causes the kernel to accept RAs even when forwarding is enabled. However, enabling forwarding purges all default routes on the system, breaking connectivity until the next RA is received. Fix this by not purging default routes on interfaces that have accept_ra=2. Signed-off-by: Lorenzo Colitti Acked-by: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 928266569689..e5fe0041adfa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1915,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); From acac8406cd523a3afbd6c6db31e9f763644bf6ba Mon Sep 17 00:00:00 2001 From: Frank Li Date: Sun, 3 Mar 2013 20:52:38 +0000 Subject: [PATCH 149/261] net: fec: fix build error in no MXC platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build error cause by Commit ff43da86c69d76a726ffe7d1666148960dc1d108 ("NET: FEC: dynamtic check DMA desc buff type") drivers/net/ethernet/freescale/fec.c: In function ‘fec_enet_get_nextdesc’: drivers/net/ethernet/freescale/fec.c:215:18: error: invalid use of undefined type ‘struct bufdesc_ex’ drivers/net/ethernet/freescale/fec.c: In function ‘fec_enet_get_prevdesc’: drivers/net/ethernet/freescale/fec.c:224:18: error: invalid use of undefined type ‘struct bufdesc_ex’ drivers/net/ethernet/freescale/fec.c: In function ‘fec_enet_start_xmit’: drivers/net/ethernet/freescale/fec.c:286:37: error: arithmetic on pointer to an incomplete type drivers/net/ethernet/freescale/fec.c:287:13: error: arithmetic on pointer to an incomplete type drivers/net/ethernet/freescale/fec.c:324:7: error: dereferencing pointer to incomplete type etc.... Signed-off-by: Frank Li Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index c0f63be91ff7..f5390071efd0 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -97,6 +97,13 @@ struct bufdesc { unsigned short cbd_sc; /* Control and status info */ unsigned long cbd_bufaddr; /* Buffer address */ }; +#else +struct bufdesc { + unsigned short cbd_sc; /* Control and status info */ + unsigned short cbd_datlen; /* Data length */ + unsigned long cbd_bufaddr; /* Buffer address */ +}; +#endif struct bufdesc_ex { struct bufdesc desc; @@ -107,14 +114,6 @@ struct bufdesc_ex { unsigned short res0[4]; }; -#else -struct bufdesc { - unsigned short cbd_sc; /* Control and status info */ - unsigned short cbd_datlen; /* Data length */ - unsigned long cbd_bufaddr; /* Buffer address */ -}; -#endif - /* * The following definitions courtesy of commproc.h, which where * Copyright (c) 1997 Dan Malek (dmalek@jlc.net). From 7dac1b514a00817ddb43704068c14ffd8b8fba19 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 3 Mar 2013 20:57:18 +0000 Subject: [PATCH 150/261] rds: simplify a warning message Cc: David S. Miller Cc: Venkat Venkatsubra Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/rds/message.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index aff589cc022e..aba232f9f308 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -82,10 +82,7 @@ static void rds_message_purge(struct rds_message *rm) void rds_message_put(struct rds_message *rm) { rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); - if (atomic_read(&rm->m_refcount) == 0) { -printk(KERN_CRIT "danger refcount zero on %p\n", rm); -WARN_ON(1); - } + WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); if (atomic_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); From d2123be0e55101742dacd6dae60cdd0f1e2ef302 Mon Sep 17 00:00:00 2001 From: Silviu-Mihai Popescu Date: Sun, 3 Mar 2013 21:09:31 +0000 Subject: [PATCH 151/261] CAIF: fix sparse warning for caif_usb This fixes the following sparse warning: net/caif/caif_usb.c:84:16: warning: symbol 'cfusbl_create' was not declared. Should it be static? Signed-off-by: Silviu-Mihai Popescu Signed-off-by: David S. Miller --- net/caif/caif_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 3ebc8cbc91ff..ef8ebaa993cf 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -81,8 +81,8 @@ static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, layr->up->ctrlcmd(layr->up, ctrl, layr->id); } -struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], - u8 braddr[ETH_ALEN]) +static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], + u8 braddr[ETH_ALEN]) { struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); From aab2b4bf224ef8358d262f95b568b8ad0cecf0a0 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 4 Mar 2013 06:23:05 +0000 Subject: [PATCH 152/261] tcp: fix double-counted receiver RTT when leaving receiver fast path We should not update ts_recent and call tcp_rcv_rtt_measure_ts() both before and after going to step5. That wastes CPU and double-counts the receiver-side RTT sample. Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a759e19496d2..0d9bdacce99f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5485,6 +5485,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tcp_checksum_complete_user(sk, skb)) goto csum_error; + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; + /* Predicted packet is in window by definition. * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: @@ -5496,9 +5499,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ From 9bf7a4890518186238d2579be16ecc5190a707c0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2013 13:35:47 -0500 Subject: [PATCH 153/261] Btrfs: use set_nlink if our i_nlink is 0 We need to inc the nlink of deleted entries when running replay so we can do the unlink on the fs_root and get everything cleaned up and then have the orphan cleanup do the right thing. The problem is inc_nlink complains about this, even thought it still does the right thing. So use set_nlink() if our i_nlink is 0 to keep users from seeing the warnings during log replay. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7ef569eb22a..451fad96ecd1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1382,7 +1382,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { - btrfs_inc_nlink(inode); + if (!inode->i_nlink) + set_nlink(inode, 1); + else + btrfs_inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; From aec8030a8745221c8658f2033b22c98528897b13 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 4 Mar 2013 09:44:29 +0000 Subject: [PATCH 154/261] Btrfs: fix wrong handle at error path of create_snapshot() when the commit fails There are several bugs at error path of create_snapshot() when the transaction commitment failed. - access the freed transaction handler. At the end of the transaction commitment, the transaction handler was freed, so we should not access it after the transaction commitment. - we were not aware of the error which happened during the snapshot creation if we submitted a async transaction commitment. - pending snapshot access vs pending snapshot free. when something wrong happened after we submitted a async transaction commitment, the transaction committer would cleanup the pending snapshots and free them. But the snapshot creators were not aware of it, they would access the freed pending snapshots. This patch fixes the above problems by: - remove the dangerous code that accessed the freed handler - assign ->error if the error happens during the snapshot creation - the transaction committer doesn't free the pending snapshots, just assigns the error number and evicts them before we unblock the transaction. Reported-by: Dan Carpenter Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 16 +++++------- fs/btrfs/ioctl.c | 6 +---- fs/btrfs/transaction.c | 58 ++++++++++++++++++++++++------------------ 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 02369a3c162e..7d84651e850b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -62,7 +62,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, @@ -3687,7 +3687,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t) { struct btrfs_pending_snapshot *snapshot; struct list_head splice; @@ -3700,10 +3700,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) snapshot = list_entry(splice.next, struct btrfs_pending_snapshot, list); - + snapshot->error = -ECANCELED; list_del_init(&snapshot->list); - - kfree(snapshot); } } @@ -3840,6 +3838,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->blocked = 1; wake_up(&root->fs_info->transaction_blocked_wait); + btrfs_evict_pending_snapshots(cur_trans); + cur_trans->blocked = 0; wake_up(&root->fs_info->transaction_wait); @@ -3849,8 +3849,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); - btrfs_destroy_pending_snapshots(cur_trans); - btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, EXTENT_DIRTY); btrfs_destroy_pinned_extent(root, @@ -3894,6 +3892,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); + btrfs_evict_pending_snapshots(t); + t->blocked = 0; smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) @@ -3907,8 +3907,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); - btrfs_destroy_pending_snapshots(t); - btrfs_destroy_delalloc_inodes(root); spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b908960c9746..94c0e42dfa1e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -596,12 +596,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); } - if (ret) { - /* cleanup_transaction has freed this for us */ - if (trans->aborted) - pending_snapshot = NULL; + if (ret) goto fail; - } ret = pending_snapshot->error; if (ret) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f11c2e0a3746..d8fce6fe9cf8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1053,7 +1053,12 @@ int btrfs_defrag_root(struct btrfs_root *root) /* * new snapshots need to be created at a very specific time in the - * transaction commit. This does the actual creation + * transaction commit. This does the actual creation. + * + * Note: + * If the error which may affect the commitment of the current transaction + * happens, we should return the error number. If the error which just affect + * the creation of the pending snapshots, just return 0. */ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -1072,7 +1077,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct extent_buffer *tmp; struct extent_buffer *old; struct timespec cur_time = CURRENT_TIME; - int ret; + int ret = 0; u64 to_reserve = 0; u64 index = 0; u64 objectid; @@ -1081,40 +1086,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { - ret = pending->error = -ENOMEM; - return ret; + pending->error = -ENOMEM; + return 0; } new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { - ret = pending->error = -ENOMEM; + pending->error = -ENOMEM; goto root_item_alloc_fail; } - ret = btrfs_find_free_objectid(tree_root, &objectid); - if (ret) { - pending->error = ret; + pending->error = btrfs_find_free_objectid(tree_root, &objectid); + if (pending->error) goto no_free_objectid; - } btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); if (to_reserve > 0) { - ret = btrfs_block_rsv_add(root, &pending->block_rsv, - to_reserve, - BTRFS_RESERVE_NO_FLUSH); - if (ret) { - pending->error = ret; + pending->error = btrfs_block_rsv_add(root, + &pending->block_rsv, + to_reserve, + BTRFS_RESERVE_NO_FLUSH); + if (pending->error) goto no_free_objectid; - } } - ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, - objectid, pending->inherit); - if (ret) { - pending->error = ret; + pending->error = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (pending->error) goto no_free_objectid; - } key.objectid = objectid; key.offset = (u64)-1; @@ -1142,7 +1143,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.len, 0); if (dir_item != NULL && !IS_ERR(dir_item)) { pending->error = -EEXIST; - goto fail; + goto dir_item_existed; } else if (IS_ERR(dir_item)) { ret = PTR_ERR(dir_item); btrfs_abort_transaction(trans, root, ret); @@ -1273,6 +1274,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) btrfs_abort_transaction(trans, root, ret); fail: + pending->error = ret; +dir_item_existed: trans->block_rsv = rsv; trans->bytes_reserved = 0; no_free_objectid: @@ -1288,12 +1291,17 @@ root_item_alloc_fail: static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { - struct btrfs_pending_snapshot *pending; + struct btrfs_pending_snapshot *pending, *next; struct list_head *head = &trans->transaction->pending_snapshots; + int ret = 0; - list_for_each_entry(pending, head, list) - create_pending_snapshot(trans, fs_info, pending); - return 0; + list_for_each_entry_safe(pending, next, head, list) { + list_del(&pending->list); + ret = create_pending_snapshot(trans, fs_info, pending); + if (ret) + break; + } + return ret; } static void update_super_roots(struct btrfs_root *root) From 00d71c9c17b1fd28fa54f323a29a0e23c6d3de40 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 4 Mar 2013 09:45:06 +0000 Subject: [PATCH 155/261] Btrfs: fix unclosed transaction handler when the async transaction commitment fails If the async transaction commitment failed, we need close the current transaction handler, or the current transaction will be blocked to commit because of this orphan handler. We fix the problem by doing sync transaction commitment, that is to invoke btrfs_commit_transaction(). Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 94c0e42dfa1e..3fdfabcc1aaa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -527,6 +527,8 @@ fail: if (async_transid) { *async_transid = trans->transid; err = btrfs_commit_transaction_async(trans, root, 1); + if (err) + err = btrfs_commit_transaction(trans, root); } else { err = btrfs_commit_transaction(trans, root); } @@ -592,6 +594,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, *async_transid = trans->transid; ret = btrfs_commit_transaction_async(trans, root->fs_info->extent_root, 1); + if (ret) + ret = btrfs_commit_transaction(trans, root); } else { ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); From 8f71f3e0e4fb5a2445fb93d3057a33aefc4aa30d Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Mar 2013 16:25:36 +0000 Subject: [PATCH 156/261] Btrfs: check for NULL pointer in updating reloc roots Add a check for NULL pointer to avoid invalid reference. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ba5a3210da9a..16e0c6fbdbed 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1269,6 +1269,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) } spin_unlock(&rc->reloc_root_tree.lock); + if (!node) + return 0; BUG_ON((struct btrfs_root *)node->data != root); if (!del) { From aca1bba6f9bc550a33312b28e98b3de5ddb3bb15 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Mar 2013 16:25:37 +0000 Subject: [PATCH 157/261] Btrfs: build up error handling for merge_reloc_roots We first use btrfs_std_error hook to replace with BUG_ON, and we also need to cleanup what is left, including reloc roots rbtree and reloc roots list. Here we use a helper function to cleanup both rbtree and list, and since this function can also be used in the balance recover path, we also make the change as well to keep code simple. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 47 ++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 16e0c6fbdbed..0f001c14eaf4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2239,6 +2239,21 @@ again: return err; } +static noinline_for_stack +void free_reloc_roots(struct list_head *list) +{ + struct btrfs_root *reloc_root; + + while (!list_empty(list)) { + reloc_root = list_entry(list->next, struct btrfs_root, + root_list); + __update_reloc_root(reloc_root, 1); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + kfree(reloc_root); + } +} + static noinline_for_stack int merge_reloc_roots(struct reloc_control *rc) { @@ -2246,7 +2261,7 @@ int merge_reloc_roots(struct reloc_control *rc) struct btrfs_root *reloc_root; LIST_HEAD(reloc_roots); int found = 0; - int ret; + int ret = 0; again: root = rc->extent_root; @@ -2272,20 +2287,33 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); - BUG_ON(ret); + if (ret) + goto out; } else { list_del_init(&reloc_root->root_list); } ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); - BUG_ON(ret < 0); + if (ret < 0) { + if (list_empty(&reloc_root->root_list)) + list_add_tail(&reloc_root->root_list, + &reloc_roots); + goto out; + } } if (found) { found = 0; goto again; } +out: + if (ret) { + btrfs_std_error(root->fs_info, ret); + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + } + BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); - return 0; + return ret; } static void free_block_list(struct rb_root *blocks) @@ -4266,14 +4294,9 @@ int btrfs_recover_relocation(struct btrfs_root *root) out_free: kfree(rc); out: - while (!list_empty(&reloc_roots)) { - reloc_root = list_entry(reloc_roots.next, - struct btrfs_root, root_list); - list_del(&reloc_root->root_list); - free_extent_buffer(reloc_root->node); - free_extent_buffer(reloc_root->commit_root); - kfree(reloc_root); - } + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + btrfs_free_path(path); if (err == 0) { From e1a1267054ea9dcafd01636e52d441f809efad5e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Mar 2013 16:25:38 +0000 Subject: [PATCH 158/261] Btrfs: free all recorded tree blocks on error We've missed the 'free blocks' part on ENOMEM error. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0f001c14eaf4..7d1654cf9d48 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2848,8 +2848,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, int err = 0; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out_path; + } rb_node = rb_first(blocks); while (rb_node) { @@ -2888,10 +2890,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, rb_node = rb_next(rb_node); } out: - free_block_list(blocks); err = finish_pending_nodes(trans, rc, path, err); btrfs_free_path(path); +out_path: + free_block_list(blocks); return err; } From 288189471d7e12fb22c37870e151833f438deea8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Mar 2013 16:25:39 +0000 Subject: [PATCH 159/261] Btrfs: do not BUG_ON in prepare_to_reloc We can bail out from here gracefully instead of a cold BUG_ON. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 7d1654cf9d48..9d13786eec73 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3731,7 +3731,15 @@ int prepare_to_relocate(struct reloc_control *rc) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + /* + * extent tree is not a ref_cow tree and has no reloc_root to + * cleanup. And callers are responsible to free the above + * block rsv. + */ + return PTR_ERR(trans); + } btrfs_commit_transaction(trans, rc->extent_root); return 0; } From 0f788c58194e4ccc5b3ab23f872c5e18542335e4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Mar 2013 16:25:40 +0000 Subject: [PATCH 160/261] Btrfs: do not BUG_ON on aborted situation Btrfs balance can easily hit BUG_ON in these places, but we want to it bail out gracefully after we force the whole filesystem to readonly. So we use btrfs_std_error hook in place of BUG_ON. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 6 +++++- fs/btrfs/volumes.c | 9 +++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9d13786eec73..3ebe87977aae 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3771,7 +3771,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) while (1) { progress++; trans = btrfs_start_transaction(rc->extent_root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + break; + } restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 35bb2d4ed29f..9ff454df6756 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2379,7 +2379,11 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, return ret; trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + btrfs_std_error(root->fs_info, ret); + return ret; + } lock_chunks(root); @@ -3050,7 +3054,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) unset_balance_control(fs_info); ret = del_balance_item(fs_info->tree_root); - BUG_ON(ret); + if (ret) + btrfs_std_error(fs_info, ret); atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } From 66b6135b7cf741f6f44ba938b27583ea3b83bd12 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Mar 2013 16:25:41 +0000 Subject: [PATCH 161/261] Btrfs: avoid deadlock on transaction waiting list Only let one trans handle to wait for other handles, otherwise we will get ABBA issues. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d8fce6fe9cf8..fedede1fe178 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1457,6 +1457,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, err); spin_lock(&root->fs_info->trans_lock); + + if (list_empty(&cur_trans->list)) { + spin_unlock(&root->fs_info->trans_lock); + btrfs_end_transaction(trans, root); + return; + } + list_del_init(&cur_trans->list); if (cur_trans == root->fs_info->running_transaction) { root->fs_info->trans_no_join = 1; From 9b53157aac7366cea413ee29b629f83225829e87 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 4 Mar 2013 17:28:38 +0000 Subject: [PATCH 162/261] Btrfs: allow running defrag in parallel to administrative tasks Commit 5ac00add added a testnset mutex and code that disallows running administrative tasks in parallel. It is prevented that the device add/delete/balance/replace/resize operations are started in parallel. By mistake, the defragmentation operation was included in the check for mutually exclusiveness as well. This is fixed with this commit. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3fdfabcc1aaa..898c5729e7e5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2245,13 +2245,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) if (ret) return ret; - if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, - 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - mnt_drop_write_file(file); - return -EINVAL; - } - if (btrfs_root_readonly(root)) { ret = -EROFS; goto out; @@ -2306,7 +2299,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EINVAL; } out: - atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); mnt_drop_write_file(file); return ret; } From f7f154f1246ccc5a0a7e9ce50932627d60a0c878 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 5 Mar 2013 10:07:08 +1030 Subject: [PATCH 163/261] hw_random: make buffer usable in scatterlist. virtio_rng feeds the randomness buffer handed by the core directly into the scatterlist, since commit bb347d98079a547e80bd4722dee1de61e4dca0e8. However, if CONFIG_HW_RANDOM=m, the static buffer isn't a linear address (at least on most archs). We could fix this in virtio_rng, but it's actually far easier to just do it in the core as virtio_rng would have to allocate a buffer every time (it doesn't know how much the core will want to read). Reported-by: Aurelien Jarno Tested-by: Aurelien Jarno Signed-off-by: Rusty Russell Cc: stable@kernel.org --- drivers/char/hw_random/core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 1bafb40ec8a2..69ae5972713c 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -52,8 +53,12 @@ static struct hwrng *current_rng; static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); static int data_avail; -static u8 rng_buffer[SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES] - __cacheline_aligned; +static u8 *rng_buffer; + +static size_t rng_buffer_size(void) +{ + return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; +} static inline int hwrng_init(struct hwrng *rng) { @@ -116,7 +121,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (!data_avail) { bytes_read = rng_get_data(current_rng, rng_buffer, - sizeof(rng_buffer), + rng_buffer_size(), !(filp->f_flags & O_NONBLOCK)); if (bytes_read < 0) { err = bytes_read; @@ -307,6 +312,14 @@ int hwrng_register(struct hwrng *rng) mutex_lock(&rng_mutex); + /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */ + err = -ENOMEM; + if (!rng_buffer) { + rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL); + if (!rng_buffer) + goto out_unlock; + } + /* Must not register two RNGs with the same name. */ err = -EEXIST; list_for_each_entry(tmp, &rng_list, list) { From 0af18c5cc9403999bb189f825b816f7fc80fc0ee Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 4 Mar 2013 17:10:20 -0700 Subject: [PATCH 164/261] ASoC: tegra: fix I2S bit count mask This register field is 11 bits wide, not 15 bits wide. Given the way this value is currently, used, this patch has no practical effect. However, it's still best if the value is correct. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown --- sound/soc/tegra/tegra20_i2s.h | 2 +- sound/soc/tegra/tegra30_i2s.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra20_i2s.h b/sound/soc/tegra/tegra20_i2s.h index c27069d24d77..729958713cd4 100644 --- a/sound/soc/tegra/tegra20_i2s.h +++ b/sound/soc/tegra/tegra20_i2s.h @@ -121,7 +121,7 @@ #define TEGRA20_I2S_TIMING_NON_SYM_ENABLE (1 << 12) #define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT 0 -#define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7fff +#define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7ff #define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK (TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US << TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT) /* Fields in TEGRA20_I2S_FIFO_SCR */ diff --git a/sound/soc/tegra/tegra30_i2s.h b/sound/soc/tegra/tegra30_i2s.h index 34dc47b9581c..a294d942b9f7 100644 --- a/sound/soc/tegra/tegra30_i2s.h +++ b/sound/soc/tegra/tegra30_i2s.h @@ -110,7 +110,7 @@ #define TEGRA30_I2S_TIMING_NON_SYM_ENABLE (1 << 12) #define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT 0 -#define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7fff +#define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7ff #define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK (TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US << TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT) /* Fields in TEGRA30_I2S_OFFSET */ From 27777890d0fae55d14ef18791fc7994faf1bc867 Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Mon, 25 Feb 2013 16:20:05 +0000 Subject: [PATCH 165/261] powerpc: Fix compile of sha1-powerpc-asm.S on 32-bit When building with CRYPTO_SHA1_PPC enabled we fail with: powerpc/crypto/sha1-powerpc-asm.S: Assembler messages: powerpc/crypto/sha1-powerpc-asm.S:116: Error: can't resolve `0' {*ABS* section} - `STACKFRAMESIZE' {*UND* section} powerpc/crypto/sha1-powerpc-asm.S:116: Error: expression too complex powerpc/crypto/sha1-powerpc-asm.S:178: Error: unsupported relocation against STACKFRAMESIZE Use INT_FRAME_SIZE instead of STACKFRAMESIZE. Signed-off-by: Tony Breeds Tested-by: Christian Kujau Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/crypto/sha1-powerpc-asm.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S index a5f8264d2d3c..125e16520061 100644 --- a/arch/powerpc/crypto/sha1-powerpc-asm.S +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S @@ -113,7 +113,7 @@ STEPUP4((t)+16, fn) _GLOBAL(powerpc_sha_transform) - PPC_STLU r1,-STACKFRAMESIZE(r1) + PPC_STLU r1,-INT_FRAME_SIZE(r1) SAVE_8GPRS(14, r1) SAVE_10GPRS(22, r1) @@ -175,5 +175,5 @@ _GLOBAL(powerpc_sha_transform) REST_8GPRS(14, r1) REST_10GPRS(22, r1) - addi r1,r1,STACKFRAMESIZE + addi r1,r1,INT_FRAME_SIZE blr From 6b6680c4ea3952af8ae76915cbca41245147741b Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 25 Feb 2013 16:51:49 +0000 Subject: [PATCH 166/261] powerpc/pseries/hvcserver: Fix strncpy buffer limit in location code the dest buf len is 80 (HVCS_CLC_LENGTH + 1). the src buf len is PAGE_SIZE. if src buf string len is more than 80, it will cause issue. Signed-off-by: Chen Gang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/hvcserver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index fcf4b4cbeaf3..4557e91626c4 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -188,9 +189,9 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, = (unsigned int)last_p_partition_ID; /* copy the Null-term char too */ - strncpy(&next_partner_info->location_code[0], + strlcpy(&next_partner_info->location_code[0], (char *)&pi_buff[2], - strlen((char *)&pi_buff[2]) + 1); + sizeof(next_partner_info->location_code)); list_add_tail(&(next_partner_info->node), head); next_partner_info = NULL; From 9276dfd27897a0b29d8b5814f39a1f82f56b6b6b Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 25 Feb 2013 17:43:25 +0000 Subject: [PATCH 167/261] drivers/tty/hvc: Use strlcpy instead of strncpy when strlen pi->location_code is larger than HVCS_CLC_LENGTH + 1, original implementation can not let hvcsd->p_location_code NUL terminated. so need fix it (also can simplify the code) Signed-off-by: Chen Gang Signed-off-by: Benjamin Herrenschmidt --- drivers/tty/hvc/hvcs.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c index 1956593ee89d..81e939e90c4c 100644 --- a/drivers/tty/hvc/hvcs.c +++ b/drivers/tty/hvc/hvcs.c @@ -881,17 +881,12 @@ static struct vio_driver hvcs_vio_driver = { /* Only called from hvcs_get_pi please */ static void hvcs_set_pi(struct hvcs_partner_info *pi, struct hvcs_struct *hvcsd) { - int clclength; - hvcsd->p_unit_address = pi->unit_address; hvcsd->p_partition_ID = pi->partition_ID; - clclength = strlen(&pi->location_code[0]); - if (clclength > HVCS_CLC_LENGTH) - clclength = HVCS_CLC_LENGTH; /* copy the null-term char too */ - strncpy(&hvcsd->p_location_code[0], - &pi->location_code[0], clclength + 1); + strlcpy(&hvcsd->p_location_code[0], + &pi->location_code[0], sizeof(hvcsd->p_location_code)); } /* From 6a404806dfceba9b154015705a9e647fa7749fd8 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 Feb 2013 10:45:52 +0000 Subject: [PATCH 168/261] powerpc: Avoid link stack corruption in MMU on syscall entry path Currently we use the link register to branch up high in the early MMU on syscall entry path. Unfortunately, this trashes the link stack as the address we are going to is not associated with the earlier mflr. This patch simply converts us to used the count register (volatile over syscalls anyway) instead. This is much better at predicting in this scenario and doesn't trash link stack causing a bunch of additional branch mispredicts later. Benchmarking this on POWER8 saves a bunch of cycles on Anton's null syscall benchmark here: http://ozlabs.org/~anton/junkcode/null_syscall.c Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a8a5361fb70c..87ef8f5ee5bc 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -74,13 +74,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ mflr r10 ; \ ld r12,PACAKBASE(r13) ; \ LOAD_HANDLER(r12, system_call_entry_direct) ; \ - mtlr r12 ; \ + mtctr r12 ; \ mfspr r12,SPRN_SRR1 ; \ /* Re-use of r13... No spare regs to do this */ \ li r13,MSR_RI ; \ mtmsrd r13,1 ; \ GET_PACA(r13) ; /* get r13 back */ \ - blr ; + bctr ; #else /* We can branch directly */ #define SYSCALL_PSERIES_2_DIRECT \ From a74f350b5ce6d8dd1847aa1191ea177fff025567 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 2 Mar 2013 04:06:30 +0000 Subject: [PATCH 169/261] powerpc: Remove unused BITOP_LE_SWIZZLE macro The BITOP_LE_SWIZZLE macro was used in the little-endian bitops functions for powerpc. But these functions were converted to generic bitops and the BITOP_LE_SWIZZLE is not used anymore. Signed-off-by: Akinobu Mita Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/bitops.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index ef918a2328bb..08bd299c75b1 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -52,8 +52,6 @@ #define smp_mb__before_clear_bit() smp_mb() #define smp_mb__after_clear_bit() smp_mb() -#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) - /* Macro for generating the ***_bits() functions */ #define DEFINE_BITOP(fn, op, prefix, postfix) \ static __inline__ void fn(unsigned long mask, \ From 8170a83f15eeca9a84ff895f1a89b58918425a3f Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Mon, 4 Mar 2013 15:57:30 +0000 Subject: [PATCH 170/261] powerpc: Wireup the kcmp syscall to sys_ni Since kmp takes 2 unsigned long args there should be a compat wrapper. Since one isn't provided I think it's safer just to hook this up to not implemented. If we need it later we can do it properly then. Signed-off-by: Tony Breeds Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 535b6d8a41cc..ebbec52d21bd 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -358,3 +358,4 @@ SYSCALL_SPU(setns) COMPAT_SYS(process_vm_readv) COMPAT_SYS(process_vm_writev) SYSCALL(finit_module) +SYSCALL(ni_syscall) /* sys_kcmp */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f25b5c45c435..1487f0f12293 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 354 +#define __NR_syscalls 355 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 8c478c6c6b1e..74cb4d72d673 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -376,6 +376,7 @@ #define __NR_process_vm_readv 351 #define __NR_process_vm_writev 352 #define __NR_finit_module 353 +#define __NR_kcmp 354 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ From 57d231678ace658b3a73a0d144cfebbd4257bc0e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 4 Mar 2013 19:45:50 +0000 Subject: [PATCH 171/261] powerpc: Fix setting FSCR for HV=0 and on secondary CPUs Currently we only set the FSCR (Facility Status and Control Register) when HV=1 but this feature is available when HV=0 also. This patch sets FSCR when HV=0. Also, we currently only set the FSCR on the master CPU. This patch also sets the FSCR on secondary CPUs. Signed-off-by: Michael Neuling cc: Ian Munsie Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_power.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index d29facbf9a28..bb2d2034d61f 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -48,6 +48,7 @@ _GLOBAL(__restore_cpu_power7) _GLOBAL(__setup_cpu_power8) mflr r11 + bl __init_FSCR bl __init_hvmode_206 mtlr r11 beqlr @@ -56,13 +57,13 @@ _GLOBAL(__setup_cpu_power8) mfspr r3,SPRN_LPCR oris r3, r3, LPCR_AIL_3@h bl __init_LPCR - bl __init_FSCR bl __init_TLB mtlr r11 blr _GLOBAL(__restore_cpu_power8) mflr r11 + bl __init_FSCR mfmsr r3 rldicl. r0,r3,4,63 beqlr From fa759e9b0984748cf626aac59bca60bdab42c644 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 4 Mar 2013 19:45:51 +0000 Subject: [PATCH 172/261] powerpc: Add DSCR FSCR register bit definition This sets the DSCR (Data Stream Control Register) in the FSCR (Facility Status & Control Register). Also harmonise TAR (Target Address Register) FSCR bit definition too. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e66586122030..c9c67fc888c9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -266,7 +266,8 @@ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_TAR (1<<8) /* Enable Target Adress Register */ +#define FSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ +#define FSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ #define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ #define LPCR_VPM0 (1ul << (63-0)) From 54c9b2253d34e8998e4bff9ac2d7a3ba0b861d52 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 4 Mar 2013 19:45:52 +0000 Subject: [PATCH 173/261] powerpc: Set DSCR bit in FSCR setup We support DSCR (Data Stream Control Register) so we should make sure we set it in the FSCR (Facility Status & Control Register) incase some firmwares don't set it. If we don't set this, we'll take a facility unavailable exception when using the DSCR. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_power.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index bb2d2034d61f..ea847abb0d0a 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -116,7 +116,7 @@ __init_LPCR: __init_FSCR: mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR + ori r3,r3,FSCR_TAR|FSCR_DSCR mtspr SPRN_FSCR,r3 blr From 0920a48719f1ceefc909387a64f97563848c7854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Tue, 29 Jan 2013 19:41:59 -0800 Subject: [PATCH 174/261] drm/i915: Increase the RC6p threshold. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This increases GEN6_RC6p_THRESHOLD from 100000 to 150000. For some reason this avoids the gen6_gt_check_fifodbg.isra warnings and associated GPU lockups, which makes my ivy bridge machine stable. Signed-off-by: Stéphane Marchesin Acked-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 61fee7fcdc2c..a1794c6df1bf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2574,7 +2574,7 @@ static void gen6_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC_SLEEP, 0); I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 100000); + I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* Check if we are enabling RC6 */ From 15239099d7a7a9ecdc1ccb5b187ae4cda5488ff9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 5 Mar 2013 09:50:58 +0100 Subject: [PATCH 175/261] drm/i915: enable irqs earlier when resuming We need it to restore the ilk rc6 context, since the gpu wait no requires interrupts. But in general having interrupts around should help in code sanity, since more and more stuff is interrupt driven. This regression has been introduced in commit 3e9605018ab3e333d51cc90fccfde2031886763b Author: Chris Wilson Date: Tue Nov 27 16:22:54 2012 +0000 drm/i915: Rearrange code to only have a single method for waiting upon the ring Like in the driver load code we need to make sure that hotplug interrupts don't cause havoc with our modeset state, hence block them with the existing infrastructure. Again we ignore races where we might loose hotplug interrupts ... Note that the driver load part of the regression has already been fixed in commit 52d7ecedac3f96fb562cb482c139015372728638 Author: Daniel Vetter Date: Sat Dec 1 21:03:22 2012 +0100 drm/i915: reorder setup sequence to have irqs for output setup v2: Add a note to the commit message about which patch fixed the driver load part of the regression. Stable kernels need to backport both patches. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=54691 Cc: stable@vger.kernel.org (for 3.8 only, plese backport 52d7ecedac3f96fb5 first) Cc: Chris Wilson Cc: Mika Kuoppala Reported-and-Tested-by: Ilya Tumaykin Reviewed-by: Chris wilson (v1) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2c5ee965e473..0a8eceb75902 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -495,6 +495,7 @@ static int i915_drm_freeze(struct drm_device *dev) intel_modeset_disable(dev); drm_irq_uninstall(dev); + dev_priv->enable_hotplug_processing = false; } i915_save_state(dev); @@ -568,10 +569,20 @@ static int __i915_drm_thaw(struct drm_device *dev) error = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); + /* We need working interrupts for modeset enabling ... */ + drm_irq_install(dev); + intel_modeset_init_hw(dev); intel_modeset_setup_hw_state(dev, false); - drm_irq_install(dev); + + /* + * ... but also need to make sure that hotplug processing + * doesn't cause havoc. Like in the driver load code we don't + * bother with the tiny race here where we might loose hotplug + * notifications. + * */ intel_hpd_init(dev); + dev_priv->enable_hotplug_processing = true; } intel_opregion_init(dev); From 2069d483b39a603a5f3428a19d3b4ac89aa97f48 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Mar 2013 15:43:39 +0100 Subject: [PATCH 176/261] ALSA: vmaster: Fix slave change notification When a value of a vmaster slave control is changed, the ctl change notification is sometimes ignored. This happens when the master control overrides, e.g. when the corresponding master control is muted. The reason is that slave_put() returns the value of the actual slave put callback, and it doesn't reflect the virtual slave value change. This patch fixes the function just to return 1 whenever a slave value is changed. Cc: Signed-off-by: Takashi Iwai --- sound/core/vmaster.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c index 857586135d18..0097f3619faa 100644 --- a/sound/core/vmaster.c +++ b/sound/core/vmaster.c @@ -213,7 +213,10 @@ static int slave_put(struct snd_kcontrol *kcontrol, } if (!changed) return 0; - return slave_put_val(slave, ucontrol); + err = slave_put_val(slave, ucontrol); + if (err < 0) + return err; + return 1; } static int slave_tlv_cmd(struct snd_kcontrol *kcontrol, From 154ea2893002618bc3f9a1e2d8186c65490968b1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Mar 2013 11:11:26 -0500 Subject: [PATCH 177/261] Btrfs: enforce min_bytes parameter during extent allocation Commit 24542bf7ea5e4fdfdb5157ff544c093fa4dcb536 changed preallocation of extents to cap the max size we try to allocate. It's a valid change, but the extent reservation code is also used by balance, and that can't tolerate a smaller extent being allocated. __btrfs_prealloc_file_range already has a min_size parameter, which is used by relocation to request a specific extent size. This commit adds an extra check to enforce that minimum extent size. Signed-off-by: Chris Mason Reported-by: Stefan Behrens --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ecd9c4cdb0db..13ab4de0a400 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8502,6 +8502,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_key ins; u64 cur_offset = start; u64 i_size; + u64 cur_bytes; int ret = 0; bool own_trans = true; @@ -8516,8 +8517,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, } } - ret = btrfs_reserve_extent(trans, root, - min(num_bytes, 256ULL * 1024 * 1024), + cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); + cur_bytes = max(cur_bytes, min_size); + ret = btrfs_reserve_extent(trans, root, cur_bytes, min_size, 0, *alloc_hint, &ins, 1); if (ret) { if (own_trans) From 44498aea293b37af1d463acd9658cdce1ecdf427 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 22 Feb 2013 17:05:28 -0300 Subject: [PATCH 178/261] drm/i915: also disable south interrupts when handling them From the docs: "IIR can queue up to two interrupt events. When the IIR is cleared, it will set itself again after one clock if a second event was stored." "Only the rising edge of the PCH Display interrupt will cause the North Display IIR (DEIIR) PCH Display Interrupt even bit to be set, so all PCH Display Interrupts, including back to back interrupts, must be cleared before a new PCH Display interrupt can cause DEIIR to be set". The current code works fine because we don't get many interrupts, but if we enable the PCH FIFO underrun interrupts we'll start getting so many interrupts that at some point new PCH interrupts won't cause DEIIR to be set. The initial implementation I tried was to turn the code that checks SDEIIR into a loop, but we can still get interrupts even after the loop is done (and before the irq handler finishes), so we have to either disable the interrupts or mask them. In the end I concluded that just disabling the PCH interrupts is enough, you don't even need the loop, so this is what this patch implements. I've tested it and it passes the 2 "PCH FIFO underrun interrupt storms" I can reproduce: the "ironlake_crtc_disable" case and the "wrong watermarks" case. In other words, here's how to reproduce the problem fixed by this patch: 1 - Enable PCH FIFO underrun interrupts (SERR_INT on SNB+) 2 - Boot the machine 3 - While booting we'll get tons of PCH FIFO underrun interrupts 4 - Plug a new monitor 5 - Run xrandr, notice it won't detect the new monitor 6 - Read SDEIIR and notice it's not 0 while DEIIR is 0 Q: Can't we just clear DEIIR before SDEIIR? A: It doesn't work. SDEIIR has to be completely cleared (including the interrupts stored on its back queue) before it can flip DEIIR's bit to 1 again, and even while you're clearing it you'll be getting more and more interrupts. Q: Why does it work by just disabling+enabling the south interrupts? A: Because when we re-enable them, if there's something on the SDEIIR register (maybe an interrupt stored on the queue), the re-enabling will make DEIIR's bit flip to 1, and since we'll already have interrupts enabled we'll get another interrupt, then run our irq handler again to process the "back" interrupts. v2: Even bigger commit message, added code comments. Note that this fixes missed dp aux irqs which have been reported for 3.9-rc1. This regression has been introduced by switching to irq-driven dp aux transactions with commit 9ee32fea5fe810ec06af3a15e4c65478de56d4f5 Author: Daniel Vetter Date: Sat Dec 1 13:53:48 2012 +0100 drm/i915: irq-drive the dp aux communication References: http://www.mail-archive.com/intel-gfx@lists.freedesktop.org/msg18588.html References: https://lkml.org/lkml/2013/2/26/769 Tested-by: Imre Deak Reported-by: Sedat Dilek Reported-by: Linus Torvalds Signed-off-by: Paulo Zanoni [danvet: Pimp commit message with references for the dp aux irq timeout regression this fixes.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 2cd97d1cc920..3c7bb0410b51 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -701,7 +701,7 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - u32 de_iir, gt_iir, de_ier, pm_iir; + u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier; irqreturn_t ret = IRQ_NONE; int i; @@ -711,6 +711,15 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg) de_ier = I915_READ(DEIER); I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); + /* Disable south interrupts. We'll only write to SDEIIR once, so further + * interrupts will will be stored on its back queue, and then we'll be + * able to process them after we restore SDEIER (as soon as we restore + * it, we'll get an interrupt if SDEIIR still has something to process + * due to its back queue). */ + sde_ier = I915_READ(SDEIER); + I915_WRITE(SDEIER, 0); + POSTING_READ(SDEIER); + gt_iir = I915_READ(GTIIR); if (gt_iir) { snb_gt_irq_handler(dev, dev_priv, gt_iir); @@ -759,6 +768,8 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg) I915_WRITE(DEIER, de_ier); POSTING_READ(DEIER); + I915_WRITE(SDEIER, sde_ier); + POSTING_READ(SDEIER); return ret; } @@ -778,7 +789,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; int ret = IRQ_NONE; - u32 de_iir, gt_iir, de_ier, pm_iir; + u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier; atomic_inc(&dev_priv->irq_received); @@ -787,6 +798,15 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); POSTING_READ(DEIER); + /* Disable south interrupts. We'll only write to SDEIIR once, so further + * interrupts will will be stored on its back queue, and then we'll be + * able to process them after we restore SDEIER (as soon as we restore + * it, we'll get an interrupt if SDEIIR still has something to process + * due to its back queue). */ + sde_ier = I915_READ(SDEIER); + I915_WRITE(SDEIER, 0); + POSTING_READ(SDEIER); + de_iir = I915_READ(DEIIR); gt_iir = I915_READ(GTIIR); pm_iir = I915_READ(GEN6_PMIIR); @@ -849,6 +869,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) done: I915_WRITE(DEIER, de_ier); POSTING_READ(DEIER); + I915_WRITE(SDEIER, sde_ier); + POSTING_READ(SDEIER); return ret; } From 576cfb404c9cab728e9462ea713f3422679d5cf7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Mar 2013 21:16:16 +0100 Subject: [PATCH 179/261] x86, smpboot: Remove unused variable The cpuinfo_x86 ptr is unused now. Drop it. Got obsolete by 69fb3676df33 ("x86 idle: remove mwait_idle() and "idle=mwait" cmdline param") removing its only user. [ hpa: fixes gcc warning ] Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1362428180-8865-2-git-send-email-bp@alien8.de Cc: Len Brown Signed-off-by: H. Peter Anvin --- arch/x86/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a6ceaedc396a..9f190a2a00e9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1365,9 +1365,8 @@ static inline void mwait_play_dead(void) unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; - int i; void *mwait_ptr; - struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); + int i; if (!this_cpu_has(X86_FEATURE_MWAIT)) return; From 82968b7e8d6150fcea0b48488f7bf6fb25e7b099 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Mar 2013 22:59:35 +0800 Subject: [PATCH 180/261] ASoC: wm5102: Apply a SYSCLK patch for later revs Evaluation has identified some performance improvements to the device. Signed-off-by: Mark Brown --- sound/soc/codecs/wm5102.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index deb109709bf4..cef288cf9998 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -573,6 +573,13 @@ static const struct reg_default wm5102_sysclk_reva_patch[] = { { 0x025e, 0x0112 }, }; +static const struct reg_default wm5102_sysclk_revb_patch[] = { + { 0x3081, 0x08FE }, + { 0x3083, 0x00ED }, + { 0x30C1, 0x08FE }, + { 0x30C3, 0x00ED }, +}; + static int wm5102_sysclk_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -587,6 +594,10 @@ static int wm5102_sysclk_ev(struct snd_soc_dapm_widget *w, patch = wm5102_sysclk_reva_patch; patch_size = ARRAY_SIZE(wm5102_sysclk_reva_patch); break; + default: + patch = wm5102_sysclk_revb_patch; + patch_size = ARRAY_SIZE(wm5102_sysclk_revb_patch); + break; } switch (event) { From 4ecccd9edd5eb4dd185486e6e593c671484691bc Mon Sep 17 00:00:00 2001 From: "Li, Zhen-Hua" Date: Wed, 6 Mar 2013 10:43:17 +0800 Subject: [PATCH 181/261] iommu, x86: Add DMA remap fault reason The number of DMA fault reasons in intel's document are from 1 to 0xD, but in dmar.c fault reason 0xD is not printed out. In this document: "Intel Virtualization Technology for Directed I/O Architecture Specification" http://download.intel.com/technology/computing/vptech/Intel(r)_VT_for_Direct_IO.pdf Chapter 4. Support For Device-IOTLBs Table 6. Unsuccessful Translated Requests There is fault reason for 0xD not listed in kernel: Present context-entry used to process translation request specifies blocking of Translation Requests (Translation Type (T) field value not equal to 01b). This patch adds reason 0xD as well. Signed-off-by: Li, Zhen-Hua Cc: Joerg Roedel Cc: Donald Dutile Cc: Suresh Siddha Cc: Hannes Reinecke Link: http://lkml.kernel.org/r/1362537797-6034-1-git-send-email-zhen-hual@hp.com Signed-off-by: Ingo Molnar --- drivers/iommu/dmar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index dc7e478b7e5f..e5cdaf87822c 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1083,6 +1083,7 @@ static const char *dma_remap_fault_reasons[] = "non-zero reserved fields in RTP", "non-zero reserved fields in CTP", "non-zero reserved fields in PTE", + "PCE for translation request specifies blocking", }; static const char *irq_remap_fault_reasons[] = From 60222c0c2b4d813c72296b55f07d46b19ef83e44 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 5 Mar 2013 19:09:37 +0100 Subject: [PATCH 182/261] drm/i915: Fix incorrect definition of ADPA HSYNC and VSYNC bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable bits for ADPA HSYNC and VSYNC where mixed up resulting in suspend becoming standby and vice versa. Fixed by swapping their bit position. Reported-by: Ville Syrjälä Signed-off-by: Patrik Jakobsson Reviewed-by: Eric Anholt Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 527b664d3434..848992f67d56 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1613,9 +1613,9 @@ #define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16) #define ADPA_USE_VGA_HVPOLARITY (1<<15) #define ADPA_SETS_HVPOLARITY 0 -#define ADPA_VSYNC_CNTL_DISABLE (1<<11) +#define ADPA_VSYNC_CNTL_DISABLE (1<<10) #define ADPA_VSYNC_CNTL_ENABLE 0 -#define ADPA_HSYNC_CNTL_DISABLE (1<<10) +#define ADPA_HSYNC_CNTL_DISABLE (1<<11) #define ADPA_HSYNC_CNTL_ENABLE 0 #define ADPA_VSYNC_ACTIVE_HIGH (1<<4) #define ADPA_VSYNC_ACTIVE_LOW 0 From 68d929862e29a8b52a7f2f2f86a0600423b093cd Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Sat, 2 Mar 2013 19:40:17 -0500 Subject: [PATCH 183/261] efi: be more paranoid about available space when creating variables UEFI variables are typically stored in flash. For various reasons, avaiable space is typically not reclaimed immediately upon the deletion of a variable - instead, the system will garbage collect during initialisation after a reboot. Some systems appear to handle this garbage collection extremely poorly, failing if more than 50% of the system flash is in use. This can result in the machine refusing to boot. The safest thing to do for the moment is to forbid writes if they'd end up using more than half of the storage space. We can make this more finegrained later if we come up with a method for identifying the broken machines. Signed-off-by: Matthew Garrett Cc: Josh Boyer Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 106 +++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 27 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 7320bf891706..0d504971ed4c 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -426,6 +426,44 @@ get_var_data(struct efivars *efivars, struct efi_variable *var) return status; } +static efi_status_t +check_var_size_locked(struct efivars *efivars, u32 attributes, + unsigned long size) +{ + u64 storage_size, remaining_size, max_size; + efi_status_t status; + const struct efivar_operations *fops = efivars->ops; + + if (!efivars->ops->query_variable_info) + return EFI_UNSUPPORTED; + + status = fops->query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + + if (status != EFI_SUCCESS) + return status; + + if (!storage_size || size > remaining_size || size > max_size || + (remaining_size - size) < (storage_size / 2)) + return EFI_OUT_OF_RESOURCES; + + return status; +} + + +static efi_status_t +check_var_size(struct efivars *efivars, u32 attributes, unsigned long size) +{ + efi_status_t status; + unsigned long flags; + + spin_lock_irqsave(&efivars->lock, flags); + status = check_var_size_locked(efivars, attributes, size); + spin_unlock_irqrestore(&efivars->lock, flags); + + return status; +} + static ssize_t efivar_guid_read(struct efivar_entry *entry, char *buf) { @@ -547,11 +585,16 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) } spin_lock_irq(&efivars->lock); - status = efivars->ops->set_variable(new_var->VariableName, - &new_var->VendorGuid, - new_var->Attributes, - new_var->DataSize, - new_var->Data); + + status = check_var_size_locked(efivars, new_var->Attributes, + new_var->DataSize + utf16_strsize(new_var->VariableName, 1024)); + + if (status == EFI_SUCCESS || status == EFI_UNSUPPORTED) + status = efivars->ops->set_variable(new_var->VariableName, + &new_var->VendorGuid, + new_var->Attributes, + new_var->DataSize, + new_var->Data); spin_unlock_irq(&efivars->lock); @@ -702,8 +745,7 @@ static ssize_t efivarfs_file_write(struct file *file, u32 attributes; struct inode *inode = file->f_mapping->host; unsigned long datasize = count - sizeof(attributes); - unsigned long newdatasize; - u64 storage_size, remaining_size, max_size; + unsigned long newdatasize, varsize; ssize_t bytes = 0; if (count < sizeof(attributes)) @@ -722,28 +764,18 @@ static ssize_t efivarfs_file_write(struct file *file, * amounts of memory. Pick a default size of 64K if * QueryVariableInfo() isn't supported by the firmware. */ - spin_lock_irq(&efivars->lock); - if (!efivars->ops->query_variable_info) - status = EFI_UNSUPPORTED; - else { - const struct efivar_operations *fops = efivars->ops; - status = fops->query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - } - - spin_unlock_irq(&efivars->lock); + varsize = datasize + utf16_strsize(var->var.VariableName, 1024); + status = check_var_size(efivars, attributes, varsize); if (status != EFI_SUCCESS) { if (status != EFI_UNSUPPORTED) return efi_status_to_err(status); - remaining_size = 65536; + if (datasize > 65536) + return -ENOSPC; } - if (datasize > remaining_size) - return -ENOSPC; - data = kmalloc(datasize, GFP_KERNEL); if (!data) return -ENOMEM; @@ -765,6 +797,19 @@ static ssize_t efivarfs_file_write(struct file *file, */ spin_lock_irq(&efivars->lock); + /* + * Ensure that the available space hasn't shrunk below the safe level + */ + + status = check_var_size_locked(efivars, attributes, varsize); + + if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { + spin_unlock_irq(&efivars->lock); + kfree(data); + + return efi_status_to_err(status); + } + status = efivars->ops->set_variable(var->var.VariableName, &var->var.VendorGuid, attributes, datasize, @@ -1345,7 +1390,6 @@ static int efi_pstore_write(enum pstore_type_id type, efi_guid_t vendor = LINUX_EFI_CRASH_GUID; struct efivars *efivars = psi->data; int i, ret = 0; - u64 storage_space, remaining_space, max_variable_size; efi_status_t status = EFI_NOT_FOUND; unsigned long flags; @@ -1365,11 +1409,11 @@ static int efi_pstore_write(enum pstore_type_id type, * size: a size of logging data * DUMP_NAME_LEN * 2: a maximum size of variable name */ - status = efivars->ops->query_variable_info(PSTORE_EFI_ATTRIBUTES, - &storage_space, - &remaining_space, - &max_variable_size); - if (status || remaining_space < size + DUMP_NAME_LEN * 2) { + + status = check_var_size_locked(efivars, PSTORE_EFI_ATTRIBUTES, + size + DUMP_NAME_LEN * 2); + + if (status) { spin_unlock_irqrestore(&efivars->lock, flags); *id = part; return -ENOSPC; @@ -1544,6 +1588,14 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, return -EINVAL; } + status = check_var_size_locked(efivars, new_var->Attributes, + new_var->DataSize + utf16_strsize(new_var->VariableName, 1024)); + + if (status && status != EFI_UNSUPPORTED) { + spin_unlock_irq(&efivars->lock); + return efi_status_to_err(status); + } + /* now *really* create the variable via EFI */ status = efivars->ops->set_variable(new_var->VariableName, &new_var->VendorGuid, From 123abd76edf56c02a76b46d3d673897177ef067b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 5 Mar 2013 07:40:16 +0000 Subject: [PATCH 184/261] efivars: efivarfs_valid_name() should handle pstore syntax Stricter validation was introduced with commit da27a24383b2b ("efivarfs: guid part of filenames are case-insensitive") and commit 47f531e8ba3b ("efivarfs: Validate filenames much more aggressively"), which is necessary for the guid portion of efivarfs filenames, but we don't need to be so strict with the first part, the variable name. The UEFI specification doesn't impose any constraints on variable names other than they be a NULL-terminated string. The above commits caused a regression that resulted in users seeing the following message, $ sudo mount -v /sys/firmware/efi/efivars mount: Cannot allocate memory whenever pstore EFI variables were present in the variable store, since their variable names failed to pass the following check, /* GUID should be right after the first '-' */ if (s - 1 != strchr(str, '-')) as a typical pstore filename is of the form, dump-type0-10-1-. The fix is trivial since the guid portion of the filename is GUID_LEN bytes, we can use (len - GUID_LEN) to ensure the '-' character is where we expect it to be. (The bogus ENOMEM error value will be fixed in a separate patch.) Reported-by: Joseph Yasi Tested-by: Joseph Yasi Reported-by: Lingzhu Xiang Cc: Josh Boyer Cc: Jeremy Kerr Cc: Matthew Garrett Cc: # v3.8 Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 4 +- tools/testing/selftests/efivarfs/efivarfs.sh | 59 ++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 0d504971ed4c..1b9a6e17e3dc 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -974,8 +974,8 @@ static bool efivarfs_valid_name(const char *str, int len) if (len < GUID_LEN + 2) return false; - /* GUID should be right after the first '-' */ - if (s - 1 != strchr(str, '-')) + /* GUID must be preceded by a '-' */ + if (*(s - 1) != '-') return false; /* diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 880cdd5dc63f..77edcdcc016b 100644 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -125,6 +125,63 @@ test_open_unlink() ./open-unlink $file } +# test that we can create a range of filenames +test_valid_filenames() +{ + local attrs='\x07\x00\x00\x00' + local ret=0 + + local file_list="abc dump-type0-11-1-1362436005 1234 -" + for f in $file_list; do + local file=$efivarfs_mount/$f-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file could not be created" >&2 + ret=1 + else + rm $file + fi + done + + exit $ret +} + +test_invalid_filenames() +{ + local attrs='\x07\x00\x00\x00' + local ret=0 + + local file_list=" + -1234-1234-1234-123456789abc + foo + foo-bar + -foo- + foo-barbazba-foob-foob-foob-foobarbazfoo + foo------------------------------------- + -12345678-1234-1234-1234-123456789abc + a-12345678=1234-1234-1234-123456789abc + a-12345678-1234=1234-1234-123456789abc + a-12345678-1234-1234=1234-123456789abc + a-12345678-1234-1234-1234=123456789abc + 1112345678-1234-1234-1234-123456789abc" + + for f in $file_list; do + local file=$efivarfs_mount/$f + + printf "$attrs\x00" 2>/dev/null > $file + + if [ -e $file ]; then + echo "Creating $file should have failed" >&2 + rm $file + ret=1 + fi + done + + exit $ret +} + check_prereqs rc=0 @@ -135,5 +192,7 @@ run_test test_create_read run_test test_delete run_test test_zero_size_delete run_test test_open_unlink +run_test test_valid_filenames +run_test test_invalid_filenames exit $rc From feff5dc4f98330d8152b521acc2e18c16712e6c8 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 5 Mar 2013 12:46:30 +0000 Subject: [PATCH 185/261] efivarfs: return accurate error code in efivarfs_fill_super() Joseph was hitting a failure case when mounting efivarfs which resulted in an incorrect error message, $ sudo mount -v /sys/firmware/efi/efivars mount: Cannot allocate memory triggered when efivarfs_valid_name() returned -EINVAL. Make sure we pass accurate return values up the stack if efivarfs_fill_super() fails to build inodes for EFI variables. Reported-by: Joseph Yasi Reported-by: Lingzhu Xiang Cc: Josh Boyer Cc: Jeremy Kerr Cc: Matthew Garrett Cc: # v3.8 Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 1b9a6e17e3dc..bea32d1ef7d5 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -1163,15 +1163,22 @@ static struct dentry_operations efivarfs_d_ops = { static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) { + struct dentry *d; struct qstr q; + int err; q.name = name; q.len = strlen(name); - if (efivarfs_d_hash(NULL, NULL, &q)) - return NULL; + err = efivarfs_d_hash(NULL, NULL, &q); + if (err) + return ERR_PTR(err); - return d_alloc(parent, &q); + d = d_alloc(parent, &q); + if (d) + return d; + + return ERR_PTR(-ENOMEM); } static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) @@ -1181,6 +1188,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) struct efivar_entry *entry, *n; struct efivars *efivars = &__efivars; char *name; + int err = -ENOMEM; efivarfs_sb = sb; @@ -1231,8 +1239,10 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) goto fail_name; dentry = efivarfs_alloc_dentry(root, name); - if (!dentry) + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); goto fail_inode; + } /* copied by the above to local storage in the dentry. */ kfree(name); @@ -1259,7 +1269,7 @@ fail_inode: fail_name: kfree(name); fail: - return -ENOMEM; + return err; } static struct dentry *efivarfs_mount(struct file_system_type *fs_type, From f40ebd6bcbbd0d30591f42dc16be52b5086a366b Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 5 Mar 2013 14:24:48 +0100 Subject: [PATCH 186/261] drm/i915: Turn off hsync and vsync on ADPA when disabling crt According to PRM we need to disable hsync and vsync even though ADPA is disabled. The previous code did infact do the opposite so we fix it. Signed-off-by: Patrik Jakobsson Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56359 Tested-by: max Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index cfc96878d742..da1f176a40b7 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -88,7 +88,7 @@ static void intel_disable_crt(struct intel_encoder *encoder) u32 temp; temp = I915_READ(crt->adpa_reg); - temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE); + temp |= ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE; temp &= ~ADPA_DAC_ENABLE; I915_WRITE(crt->adpa_reg, temp); } From 907cc908108b16ae87b7165be992511c968159f0 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 5 Mar 2013 14:15:27 -0800 Subject: [PATCH 187/261] cpufreq / intel_pstate: Fix intel_pstate_init() error path If cpufreq_register_driver() fails just free memory that has been allocated and return. intel_pstate_exit() function is removed since we are built-in only now there is no reason for a module exit procedure. Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 39 ++++++++++------------------------ 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 096fde0ebcb5..46a23b65dfc8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -747,37 +747,11 @@ static struct cpufreq_driver intel_pstate_driver = { .owner = THIS_MODULE, }; -static void intel_pstate_exit(void) -{ - int cpu; - - sysfs_remove_group(intel_pstate_kobject, - &intel_pstate_attr_group); - debugfs_remove_recursive(debugfs_parent); - - cpufreq_unregister_driver(&intel_pstate_driver); - - if (!all_cpu_data) - return; - - get_online_cpus(); - for_each_online_cpu(cpu) { - if (all_cpu_data[cpu]) { - del_timer_sync(&all_cpu_data[cpu]->timer); - kfree(all_cpu_data[cpu]); - } - } - - put_online_cpus(); - vfree(all_cpu_data); -} -module_exit(intel_pstate_exit); - static int __initdata no_load; static int __init intel_pstate_init(void) { - int rc = 0; + int cpu, rc = 0; const struct x86_cpu_id *id; if (no_load) @@ -802,7 +776,16 @@ static int __init intel_pstate_init(void) intel_pstate_sysfs_expose_params(); return rc; out: - intel_pstate_exit(); + get_online_cpus(); + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) { + del_timer_sync(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + } + } + + put_online_cpus(); + vfree(all_cpu_data); return -ENODEV; } device_initcall(intel_pstate_init); From d3929b832833db870af72479666fa4e4d043e02e Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 5 Mar 2013 14:15:26 -0800 Subject: [PATCH 188/261] cpufreq / intel_pstate: Do not load on VM that does not report max P state. It seems some VMs support the P state MSRs but return zeros. Fail gracefully if we are running in this environment. References: https://bugzilla.redhat.com/show_bug.cgi?id=916833 Reported-by: Josh Boyer Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 46a23b65dfc8..f6dd1e761129 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -662,6 +662,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; + if (!policy->cpuinfo.max_freq) + return -ENODEV; + intel_pstate_get_min_max(cpu, &min, &max); limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; From b8083f86e8e9b23ad4d75d334c92b601947dfa91 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Mon, 4 Mar 2013 15:17:36 +0100 Subject: [PATCH 189/261] ARM: 7666/1: decompressor: add -mno-single-pic-base for building the decompressor Before jumping to (position independent) C-code from the decompressor's assembler world we set-up the C environment. This setup currently does not set r9, which for arm-none-uclinux-uclibceabi toolchains is by default expected to be the PIC offset base register (IE should point to the beginning of the GOT). Currently, therefore, in order to build working kernels that use the decompressor it is necessary to use an arm-linux-gnueabi toolchain, or similar. uClinux toolchains cause a prefetch abort to occur at the beginning of the decompress_kernel function. This patch allows uClinux toolchains to build bootable zImages by forcing the -mno-single-pic-base option, which ensures that the location of the GOT is re-derived each time it is required, and r9 becomes free for use as a general purpose register. This has a small (4% in instruction terms) advantage over the alternative of setting r9 to point to the GOT before calling into the C-world. Signed-off-by: Jonathan Austin Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 5cad8a6dadb0..afed28e37ea5 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -120,7 +120,7 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) endif -ccflags-y := -fpic -fno-builtin -I$(obj) +ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) asflags-y := -Wa,-march=all -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. From 44d6b1fc3e3c6a3af8e599b724972e881c81e1c9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 5 Mar 2013 03:54:06 +0100 Subject: [PATCH 190/261] ARM: 7667/1: perf: Fix section mismatch on armpmu_init() WARNING: vmlinux.o(.text+0xfb80): Section mismatch in reference from the function armpmu_register() to the function .init.text:armpmu_init() The function armpmu_register() references the function __init armpmu_init(). This is often because armpmu_register lacks a __init annotation or the annotation of armpmu_init is wrong. Just drop the __init marking on armpmu_init() because armpmu_register() no longer has an __init marking. Acked-by: Will Deacon Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a89206799db8..146157dfe27c 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -484,7 +484,7 @@ const struct dev_pm_ops armpmu_dev_pm_ops = { SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) }; -static void __init armpmu_init(struct arm_pmu *armpmu) +static void armpmu_init(struct arm_pmu *armpmu) { atomic_set(&armpmu->active_events, 0); mutex_init(&armpmu->reserve_mutex); From 72d282dc5109e5dc0d963be020604e0cc82f7ed7 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Tue, 5 Mar 2013 19:25:55 +0000 Subject: [PATCH 191/261] cifs: Fix bug when checking error condition in cifs_rename_pending_delete() Fix check for error condition after setting attributes with CIFSSMBSetFileInfo(). Signed-off-by: Sachin Prabhu Reviewed-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 83f2606c76d0..e7ae45cd94ef 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1023,7 +1023,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, current->tgid); /* although we would like to mark the file hidden if that fails we will still try to rename it */ - if (rc != 0) + if (!rc) cifsInode->cifsAttrs = dosattr; else dosattr = origattr; /* since not able to change them */ From c483a9841df1de327e01af7deb6ba349210e5f82 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Tue, 5 Mar 2013 19:25:56 +0000 Subject: [PATCH 192/261] cifs: Check server capability before attempting silly rename cifs_rename_pending_delete() attempts to silly rename file using CIFSSMBRenameOpenFile(). This uses the SET_FILE_INFORMATION TRANS2 command with information level set to the passthru info-level SMB_SET_FILE_RENAME_INFORMATION. We need to check to make sure that the server support passthru info-levels before attempting the silly rename or else we will fail to rename the file. Signed-off-by: Sachin Prabhu Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e7ae45cd94ef..0079696305c9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -995,6 +995,15 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, return PTR_ERR(tlink); tcon = tlink_tcon(tlink); + /* + * We cannot rename the file if the server doesn't support + * CAP_INFOLEVEL_PASSTHRU + */ + if (!(tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)) { + rc = -EBUSY; + goto out; + } + rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, From 25189643a165a5ccad48ee3c116b55905299fd3d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 21 Feb 2013 06:32:59 -0500 Subject: [PATCH 193/261] cifs: remove the sockopt= mount option ...as promised for 3.9. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 54125e04fd0c..991c63c6bdd0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -97,7 +97,7 @@ enum { Opt_user, Opt_pass, Opt_ip, Opt_unc, Opt_domain, Opt_srcaddr, Opt_prefixpath, - Opt_iocharset, Opt_sockopt, + Opt_iocharset, Opt_netbiosname, Opt_servern, Opt_ver, Opt_vers, Opt_sec, Opt_cache, @@ -202,7 +202,6 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_srcaddr, "srcaddr=%s" }, { Opt_prefixpath, "prefixpath=%s" }, { Opt_iocharset, "iocharset=%s" }, - { Opt_sockopt, "sockopt=%s" }, { Opt_netbiosname, "netbiosname=%s" }, { Opt_servern, "servern=%s" }, { Opt_ver, "ver=%s" }, @@ -1752,19 +1751,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, */ cFYI(1, "iocharset set to %s", string); break; - case Opt_sockopt: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (strnicmp(string, "TCP_NODELAY", 11) == 0) { - printk(KERN_WARNING "CIFS: the " - "sockopt=TCP_NODELAY option has been " - "deprecated and will be removed " - "in 3.9\n"); - vol->sockopt_tcp_nodelay = 1; - } - break; case Opt_netbiosname: string = match_strdup(args); if (string == NULL) From 94e18007688a13e5da1a2f1b7f52f15cc56c9f5e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Mar 2013 15:18:25 -0500 Subject: [PATCH 194/261] cifs: don't try to unlock pagecache page after releasing it We had a recent fix to fix the release of pagecache pages when cifs_writev_requeue writes fail. Unfortunately, it releases the page before trying to unlock it. At that point, the page might be gone by the time the unlock comes in. Unlock the page first before checking the value of "rc", and only then end writeback and release the pages. The page lock isn't required for any of those operations so this should be safe. Reported-by: Anton Altaparmakov Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7353bc5d73d7..8e2e799e7a24 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1909,12 +1909,12 @@ cifs_writev_requeue(struct cifs_writedata *wdata) } while (rc == -EAGAIN); for (i = 0; i < wdata->nr_pages; i++) { + unlock_page(wdata->pages[i]); if (rc != 0) { SetPageError(wdata->pages[i]); end_page_writeback(wdata->pages[i]); page_cache_release(wdata->pages[i]); } - unlock_page(wdata->pages[i]); } mapping_set_error(inode->i_mapping, rc); From 067785c40e52089993757afa28988c05f3cb2694 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 6 Mar 2013 19:38:36 +0400 Subject: [PATCH 195/261] CIFS: Fix missing of oplock_read value in smb30_values structure Cc: stable@vger.kernel.org Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c9c7aa7ed966..bceffe7b8f8d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -744,4 +744,5 @@ struct smb_version_values smb30_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, + .oplock_read = SMB2_OPLOCK_LEVEL_II, }; From 3a01aa7a25274308fe813a6237f678aed901cea3 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 6 Mar 2013 01:57:55 -0700 Subject: [PATCH 196/261] Btrfs: fix a mismerge in btrfs_balance() Raid56 merge (merge commit e942f88) had mistakenly removed a call to __cancel_balance(), which resulted in balance not cleaning up after itself after a successful finish. (Cleanup includes switching the state, removing the balance item and releasing mut_ex_op testnset lock.) Bring it back. Reported-by: David Sterba Signed-off-by: Ilya Dryomov Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9ff454df6756..6b9cff42265d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3235,6 +3235,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, update_ioctl_balance_args(fs_info, 0, bargs); } + if ((ret && ret != -ECANCELED && ret != -ENOSPC) || + balance_need_close(fs_info)) { + __cancel_balance(fs_info); + } + wake_up(&fs_info->balance_wait_q); return ret; From 015221fefbc93689dd47508a66326556adf2abcd Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Sun, 3 Mar 2013 00:14:42 +0100 Subject: [PATCH 197/261] x86: Fix 32-bit *_cpu_data initializers The commit 27be457000211a6903968dfce06d5f73f051a217 ('x86 idle: remove 32-bit-only "no-hlt" parameter, hlt_works_ok flag') removed the hlt_works_ok flag from struct cpuinfo_x86, but boot_cpu_data and new_cpu_data initializers were not changed causing setting f00f_bug flag, instead of fdiv_bug. If CONFIG_X86_F00F_BUG is not set the f00f_bug flag is never cleared. To avoid such problems in future C99-style initialization is now used. Signed-off-by: Krzysztof Mazur Acked-by: Borislav Petkov Cc: len.brown@intel.com Link: http://lkml.kernel.org/r/1362266082-2227-1-git-send-email-krzysiek@podlesie.net Signed-off-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84d32855f65c..90d8cc930f5e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -171,9 +171,15 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { + .wp_works_ok = -1, + .fdiv_bug = -1, +}; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { + .wp_works_ok = -1, + .fdiv_bug = -1, +}; EXPORT_SYMBOL(boot_cpu_data); unsigned int def_to_bigsmp; From 98e7a989979b185f49e86ddaed2ad6890299d9f0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 6 Mar 2013 20:18:21 -0800 Subject: [PATCH 198/261] x86, mm: Make sure to find a 2M free block for the first mapped area Henrik reported that his MacAir 3.1 would not boot with | commit 8d57470d8f859635deffe3919d7d4867b488b85a | Date: Fri Nov 16 19:38:58 2012 -0800 | | x86, mm: setup page table in top-down It turns out that we do not calculate the real_end properly: We try to get 2M size with 4K alignment, and later will round down to 2M, so we will get less then 2M for first mapping, in extreme case could be only 4K only. In Henrik's system it has (1M-32K) as last usable rage is [mem 0x7f9db000-0x7fef8fff]. The problem is exposed when EFI booting have several holes and it will force mapping to use PTE instead as we only map usable areas. To fix it, just make it be 2M aligned, so we can be guaranteed to be able to use large pages to map it. Reported-by: Henrik Rydberg Bisected-by: Henrik Rydberg Tested-by: Henrik Rydberg Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/CAE9FiQX4nQ7_1kg5RL_vh56rmcSHXUi1ExrZX7CwED4NGMnHfg@mail.gmail.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4903a03ae876..59b7fc453277 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -410,9 +410,8 @@ void __init init_mem_mapping(void) /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS); - /* xen has big range in reserved near end of ram, skip it at first */ - addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, - PAGE_SIZE); + /* xen has big range in reserved near end of ram, skip it at first.*/ + addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ From 2e604c0f19dcdd433b3863ffc3da9bc0787ca765 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 6 Mar 2013 20:23:30 -0800 Subject: [PATCH 199/261] x86: Don't clear efi_info even if the sentinel hits When boot_params->sentinel is set, all we really know is that some undefined set of fields in struct boot_params contain garbage. In the particular case of efi_info, however, there is a private magic for that substructure, so it is generally safe to leave it even if the bootloader is broken. kexec (for which we did the initial analysis) did not initialize this field, but of course all the EFI bootloaders do, and most EFI bootloaders are broken in this respect (and should be fixed.) Reported-by: Robin Holt Link: http://lkml.kernel.org/r/CA%2B5PVA51-FT14p4CRYKbicykugVb=PiaEycdQ57CK2km_OQuRQ@mail.gmail.com Tested-by: Josh Boyer Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam_utils.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 5b5e9cb774b5..ff808ef4fdb4 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -14,13 +14,15 @@ * analysis of kexec-tools; if other broken bootloaders initialize a * different set of fields we will need to figure out how to disambiguate. * + * Note: efi_info is commonly left uninitialized, but that field has a + * private magic, so it is better to leave it unchanged. */ static void sanitize_boot_params(struct boot_params *boot_params) { if (boot_params->sentinel) { /*fields in boot_params are not valid, clear them */ memset(&boot_params->olpc_ofw_header, 0, - (char *)&boot_params->alt_mem_k - + (char *)&boot_params->efi_info - (char *)&boot_params->olpc_ofw_header); memset(&boot_params->kbd_status, 0, (char *)&boot_params->hdr - From 3c4aff6b9a183b4f24eb7b8dd6c8a92cdba3bc75 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Wed, 6 Mar 2013 13:00:23 -0500 Subject: [PATCH 200/261] x86, doc: Be explicit about what the x86 struct boot_params requires If the sentinel triggers, we do not want the boot loader authors to just poke it and make the error go away, we want them to actually fix the problem. This should help avoid making the incorrect change in non-compliant bootloaders. [ hpa: dropped the Documentation/x86/boot.txt hunk pending clarifications ] Signed-off-by: Peter Jones Link: http://lkml.kernel.org/r/1362592823-28967-1-git-send-email-pjones@redhat.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam_utils.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index ff808ef4fdb4..653668d140f9 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -19,8 +19,22 @@ */ static void sanitize_boot_params(struct boot_params *boot_params) { + /* + * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear + * this field. The purpose of this field is to guarantee + * compliance with the x86 boot spec located in + * Documentation/x86/boot.txt . That spec says that the + * *whole* structure should be cleared, after which only the + * portion defined by struct setup_header (boot_params->hdr) + * should be copied in. + * + * If you're having an issue because the sentinel is set, you + * need to change the whole structure to be cleared, not this + * (or any other) individual field, or you will soon have + * problems again. + */ if (boot_params->sentinel) { - /*fields in boot_params are not valid, clear them */ + /* fields in boot_params are left uninitialized, clear them */ memset(&boot_params->olpc_ofw_header, 0, (char *)&boot_params->efi_info - (char *)&boot_params->olpc_ofw_header); From 25336e8ae2d2fa64c9c4cc2c9c28f641134c9fa9 Mon Sep 17 00:00:00 2001 From: Mengdong Lin Date: Thu, 7 Mar 2013 14:10:25 -0500 Subject: [PATCH 201/261] ALSA: hda - check NULL pointer when creating SPDIF controls If the SPDIF control array cannot be reallocated, the function will return to avoid dereferencing a NULL pointer. Signed-off-by: Mengdong Lin Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 04b57383e8cb..3dc656688c08 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3334,6 +3334,8 @@ int snd_hda_create_dig_out_ctls(struct hda_codec *codec, return -EBUSY; } spdif = snd_array_new(&codec->spdif_out); + if (!spdif) + return -ENOMEM; for (dig_mix = dig_mixes; dig_mix->name; dig_mix++) { kctl = snd_ctl_new1(dig_mix, codec); if (!kctl) From 4c7a548a70a44269266858f65c3b5fc9c3ace057 Mon Sep 17 00:00:00 2001 From: Mengdong Lin Date: Thu, 7 Mar 2013 14:11:05 -0500 Subject: [PATCH 202/261] ALSA: hda - check NULL pointer when creating SPDIF PCM switch If the new control cannot be created, this function will return to avoid snd_hda_ctl_add dereferencing a NULL control pointer. Signed-off-by: Mengdong Lin Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3dc656688c08..97c68dd24ef5 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3433,11 +3433,16 @@ static struct snd_kcontrol_new spdif_share_sw = { int snd_hda_create_spdif_share_sw(struct hda_codec *codec, struct hda_multi_out *mout) { + struct snd_kcontrol *kctl; + if (!mout->dig_out_nid) return 0; + + kctl = snd_ctl_new1(&spdif_share_sw, mout); + if (!kctl) + return -ENOMEM; /* ATTENTION: here mout is passed as private_data, instead of codec */ - return snd_hda_ctl_add(codec, mout->dig_out_nid, - snd_ctl_new1(&spdif_share_sw, mout)); + return snd_hda_ctl_add(codec, mout->dig_out_nid, kctl); } EXPORT_SYMBOL_HDA(snd_hda_create_spdif_share_sw); From 3bc085a12d8f9f3e45a4ac0cc24a34abd5b20657 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 7 Mar 2013 00:13:51 -0500 Subject: [PATCH 203/261] ALSA: hda/ca0132 - Avoid division by zero in dspxfr_one_seg() Move the zero check `hda_frame_size_words == 0' before the modulus `buffer_size_words % hda_frame_size_words'. Also remove the redundant null check `buffer_addx == NULL'. Signed-off-by: Xi Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index db02c1e96b08..eefc4563b2f9 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -2298,6 +2298,11 @@ static int dspxfr_one_seg(struct hda_codec *codec, hda_frame_size_words = ((sample_rate_div == 0) ? 0 : (num_chans * sample_rate_mul / sample_rate_div)); + if (hda_frame_size_words == 0) { + snd_printdd(KERN_ERR "frmsz zero\n"); + return -EINVAL; + } + buffer_size_words = min(buffer_size_words, (unsigned int)(UC_RANGE(chip_addx, 1) ? 65536 : 32768)); @@ -2308,8 +2313,7 @@ static int dspxfr_one_seg(struct hda_codec *codec, chip_addx, hda_frame_size_words, num_chans, sample_rate_mul, sample_rate_div, buffer_size_words); - if ((buffer_addx == NULL) || (hda_frame_size_words == 0) || - (buffer_size_words < hda_frame_size_words)) { + if (buffer_size_words < hda_frame_size_words) { snd_printdd(KERN_ERR "dspxfr_one_seg:failed\n"); return -EINVAL; } From 84dfd0ac231f69d70e100e712ad5e5f0092ad46b Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 7 Mar 2013 09:19:38 +0100 Subject: [PATCH 204/261] ALSA: hda - Add support of new codec ALC233 It's compatible with ALC282. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2d4237bc0d8e..563c24df4d6f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3163,6 +3163,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0290: spec->codec_variant = ALC269_TYPE_ALC280; break; + case 0x10ec0233: case 0x10ec0282: case 0x10ec0283: spec->codec_variant = ALC269_TYPE_ALC282; @@ -3862,6 +3863,7 @@ static int patch_alc680(struct hda_codec *codec) */ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0221, .name = "ALC221", .patch = patch_alc269 }, + { .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 }, { .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 }, { .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 }, { .id = 0x10ec0267, .name = "ALC267", .patch = patch_alc268 }, From 9141770548d529b9d32d5b08d59b65ee65afe0d4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 7 Mar 2013 01:08:55 -0800 Subject: [PATCH 205/261] fs: Limit sys_mount to only request filesystem modules (Part 2). Add missing MODULE_ALIAS_FS("ocfs2") how did I miss that? Remove unnecessary MODULE_ALIAS_FS("devpts") devpts can not be modular. Signed-off-by: "Eric W. Biederman" --- fs/devpts/inode.c | 1 - fs/ocfs2/super.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 79b662985efe..073d30b9d1ac 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -510,7 +510,6 @@ static struct file_system_type devpts_fs_type = { .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, #endif }; -MODULE_ALIAS_FS("devpts"); /* * The normal naming convention is simply /dev/pts/; this conforms diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9b6910dec4ba..01b85165552b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1266,6 +1266,7 @@ static struct file_system_type ocfs2_fs_type = { .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; +MODULE_ALIAS_FS("ocfs2"); static int ocfs2_check_set_options(struct super_block *sb, struct mount_options *options) From de3cb945db4d8eb3b046dc7a5ea89a893372750c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Mar 2013 17:13:31 -0500 Subject: [PATCH 206/261] Btrfs: improve the delayed inode throttling The delayed inode code batches up changes to the btree in hopes of doing them in bulk. As the changes build up, processes kick off worker threads and wait for them to make progress. The current code kicks off an async work queue item for each delayed node, which creates a lot of churn. It also uses a fixed 1 HZ waiting period for the throttle, which allows us to build a lot of pending work and can slow down the commit. This changes us to watch a sequence counter as it is bumped during the operations. We kick off fewer work items and have each work item do more work. Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 155 +++++++++++++++++++++++---------------- fs/btrfs/delayed-inode.h | 2 + 2 files changed, 94 insertions(+), 63 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0b278b117cbe..14fce27b4780 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -22,8 +22,9 @@ #include "disk-io.h" #include "transaction.h" -#define BTRFS_DELAYED_WRITEBACK 400 -#define BTRFS_DELAYED_BACKGROUND 100 +#define BTRFS_DELAYED_WRITEBACK 512 +#define BTRFS_DELAYED_BACKGROUND 128 +#define BTRFS_DELAYED_BATCH 16 static struct kmem_cache *delayed_node_cache; @@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node, BTRFS_DELAYED_DELETION_ITEM); } +static void finish_one_item(struct btrfs_delayed_root *delayed_root) +{ + int seq = atomic_inc_return(&delayed_root->items_seq); + if ((atomic_dec_return(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) && + waitqueue_active(&delayed_root->wait)) + wake_up(&delayed_root->wait); +} + static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) { struct rb_root *root; @@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) rb_erase(&delayed_item->rb_node, root); delayed_item->delayed_node->count--; - if (atomic_dec_return(&delayed_root->items) < - BTRFS_DELAYED_BACKGROUND && - waitqueue_active(&delayed_root->wait)) - wake_up(&delayed_root->wait); + + finish_one_item(delayed_root); } static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) @@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) delayed_node->count--; delayed_root = delayed_node->root->fs_info->delayed_root; - if (atomic_dec_return(&delayed_root->items) < - BTRFS_DELAYED_BACKGROUND && - waitqueue_active(&delayed_root->wait)) - wake_up(&delayed_root->wait); + finish_one_item(delayed_root); } } @@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode) btrfs_release_delayed_node(delayed_node); } -struct btrfs_async_delayed_node { - struct btrfs_root *root; - struct btrfs_delayed_node *delayed_node; +struct btrfs_async_delayed_work { + struct btrfs_delayed_root *delayed_root; + int nr; struct btrfs_work work; }; -static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) +static void btrfs_async_run_delayed_root(struct btrfs_work *work) { - struct btrfs_async_delayed_node *async_node; + struct btrfs_async_delayed_work *async_work; + struct btrfs_delayed_root *delayed_root; struct btrfs_trans_handle *trans; struct btrfs_path *path; struct btrfs_delayed_node *delayed_node = NULL; struct btrfs_root *root; struct btrfs_block_rsv *block_rsv; - int need_requeue = 0; + int total_done = 0; - async_node = container_of(work, struct btrfs_async_delayed_node, work); + async_work = container_of(work, struct btrfs_async_delayed_work, work); + delayed_root = async_work->delayed_root; path = btrfs_alloc_path(); if (!path) goto out; - path->leave_spinning = 1; - delayed_node = async_node->delayed_node; +again: + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) + goto free_path; + + delayed_node = btrfs_first_prepared_delayed_node(delayed_root); + if (!delayed_node) + goto free_path; + + path->leave_spinning = 1; root = delayed_node->root; trans = btrfs_join_transaction(root); if (IS_ERR(trans)) - goto free_path; + goto release_path; block_rsv = trans->block_rsv; trans->block_rsv = &root->fs_info->delayed_block_rsv; @@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) * Task1 will sleep until the transaction is commited. */ mutex_lock(&delayed_node->mutex); - if (delayed_node->count) - need_requeue = 1; - else - btrfs_dequeue_delayed_node(root->fs_info->delayed_root, - delayed_node); + btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node); mutex_unlock(&delayed_node->mutex); trans->block_rsv = block_rsv; btrfs_end_transaction_dmeta(trans, root); btrfs_btree_balance_dirty_nodelay(root); + +release_path: + btrfs_release_path(path); + total_done++; + + btrfs_release_prepared_delayed_node(delayed_node); + if (async_work->nr == 0 || total_done < async_work->nr) + goto again; + free_path: btrfs_free_path(path); out: - if (need_requeue) - btrfs_requeue_work(&async_node->work); - else { - btrfs_release_prepared_delayed_node(delayed_node); - kfree(async_node); - } + wake_up(&delayed_root->wait); + kfree(async_work); } -static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, - struct btrfs_root *root, int all) -{ - struct btrfs_async_delayed_node *async_node; - struct btrfs_delayed_node *curr; - int count = 0; -again: - curr = btrfs_first_prepared_delayed_node(delayed_root); - if (!curr) +static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, + struct btrfs_root *root, int nr) +{ + struct btrfs_async_delayed_work *async_work; + + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) return 0; - async_node = kmalloc(sizeof(*async_node), GFP_NOFS); - if (!async_node) { - btrfs_release_prepared_delayed_node(curr); + async_work = kmalloc(sizeof(*async_work), GFP_NOFS); + if (!async_work) return -ENOMEM; - } - async_node->root = root; - async_node->delayed_node = curr; - - async_node->work.func = btrfs_async_run_delayed_node_done; - async_node->work.flags = 0; - - btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work); - count++; - - if (all || count < 4) - goto again; + async_work->delayed_root = delayed_root; + async_work->work.func = btrfs_async_run_delayed_root; + async_work->work.flags = 0; + async_work->nr = nr; + btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); return 0; } @@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root) WARN_ON(btrfs_first_delayed_node(delayed_root)); } +static int refs_newer(struct btrfs_delayed_root *delayed_root, + int seq, int count) +{ + int val = atomic_read(&delayed_root->items_seq); + + if (val < seq || val >= seq + count) + return 1; + return 0; +} + void btrfs_balance_delayed_items(struct btrfs_root *root) { struct btrfs_delayed_root *delayed_root; + int seq; delayed_root = btrfs_get_delayed_root(root); if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) return; + seq = atomic_read(&delayed_root->items_seq); + if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { int ret; - ret = btrfs_wq_run_delayed_node(delayed_root, root, 1); + DEFINE_WAIT(__wait); + + ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); if (ret) return; - wait_event_interruptible_timeout( - delayed_root->wait, - (atomic_read(&delayed_root->items) < - BTRFS_DELAYED_BACKGROUND), - HZ); - return; + while (1) { + prepare_to_wait(&delayed_root->wait, &__wait, + TASK_INTERRUPTIBLE); + + if (refs_newer(delayed_root, seq, + BTRFS_DELAYED_BATCH) || + atomic_read(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND) { + break; + } + if (!signal_pending(current)) + schedule(); + else + break; + } + finish_wait(&delayed_root->wait, &__wait); } - btrfs_wq_run_delayed_node(delayed_root, root, 0); + btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); } /* Will return 0 or -ENOMEM */ diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 78b6ad0fc669..1d5c5f7abe3e 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -43,6 +43,7 @@ struct btrfs_delayed_root { */ struct list_head prepare_list; atomic_t items; /* for delayed items */ + atomic_t items_seq; /* for delayed items */ int nodes; /* for delayed nodes */ wait_queue_head_t wait; }; @@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root( struct btrfs_delayed_root *delayed_root) { atomic_set(&delayed_root->items, 0); + atomic_set(&delayed_root->items_seq, 0); delayed_root->nodes = 0; spin_lock_init(&delayed_root->lock); init_waitqueue_head(&delayed_root->wait); From 69a4cfdd444d1fe5c24d29b3a063964ac165d2cd Mon Sep 17 00:00:00 2001 From: Sean Connor Date: Thu, 28 Feb 2013 09:20:00 -0500 Subject: [PATCH 207/261] ALSA: ice1712: Initialize card->private_data properly Set card->private_data in snd_ice1712_create for fixing NULL dereference in snd_ice1712_remove(). Signed-off-by: Sean Connor Cc: Signed-off-by: Takashi Iwai --- sound/pci/ice1712/ice1712.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index 2ffdc35d5ffd..806407a3973e 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -2594,6 +2594,8 @@ static int snd_ice1712_create(struct snd_card *card, snd_ice1712_proc_init(ice); synchronize_irq(pci->irq); + card->private_data = ice; + err = pci_request_regions(pci, "ICE1712"); if (err < 0) { kfree(ice); From dcd9006b1b053c7b1cebe81333261d4fd492ffeb Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 5 Mar 2013 17:09:00 +0100 Subject: [PATCH 208/261] HID: logitech-dj: do not directly call hid_output_raw_report() during probe hid_output_raw_report() makes a direct call to usb_control_msg(). However, some USB3 boards have shown that the usb device is not ready during the .probe(). This blocks the entire usb device, and the paired mice, keyboards are not functional. The dmesg output is the following: [ 11.912287] logitech-djreceiver 0003:046D:C52B.0003: hiddev0,hidraw0: USB HID v1.11 Device [Logitech USB Receiver] on usb-0000:00:14.0-2/input2 [ 11.912537] logitech-djreceiver 0003:046D:C52B.0003: logi_dj_probe:logi_dj_recv_query_paired_devices error:-32 [ 11.912636] logitech-djreceiver: probe of 0003:046D:C52B.0003 failed with error -32 Relying on the scheduled call to usbhid_submit_report() fixes the problem. related bugs: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1072082 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1039143 https://bugzilla.redhat.com/show_bug.cgi?id=840391 https://bugzilla.kernel.org/show_bug.cgi?id=49781 Reported-and-tested-by: Bob Bowles Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 9500f2f3f8fe..8758f38c948c 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -459,19 +459,25 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev, struct dj_report *dj_report) { struct hid_device *hdev = djrcv_dev->hdev; - int sent_bytes; + struct hid_report *report; + struct hid_report_enum *output_report_enum; + u8 *data = (u8 *)(&dj_report->device_index); + int i; - if (!hdev->hid_output_raw_report) { - dev_err(&hdev->dev, "%s:" - "hid_output_raw_report is null\n", __func__); + output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT]; + report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT]; + + if (!report) { + dev_err(&hdev->dev, "%s: unable to find dj report\n", __func__); return -ENODEV; } - sent_bytes = hdev->hid_output_raw_report(hdev, (u8 *) dj_report, - sizeof(struct dj_report), - HID_OUTPUT_REPORT); + for (i = 0; i < report->field[0]->report_count; i++) + report->field[0]->value[i] = data[i]; - return (sent_bytes < 0) ? sent_bytes : 0; + usbhid_submit_report(hdev, report, USB_DIR_OUT); + + return 0; } static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev) From 455bd4c430b0c0a361f38e8658a0d6cb469942b5 Mon Sep 17 00:00:00 2001 From: Ivan Djelic Date: Wed, 6 Mar 2013 20:09:27 +0100 Subject: [PATCH 209/261] ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2) optimizations Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on assumptions about the implementation of memset and similar functions. The current ARM optimized memset code does not return the value of its first argument, as is usually expected from standard implementations. For instance in the following function: void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) { memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); waiter->magic = waiter; INIT_LIST_HEAD(&waiter->list); } compiled as: 800554d0 : 800554d0: e92d4008 push {r3, lr} 800554d4: e1a00001 mov r0, r1 800554d8: e3a02010 mov r2, #16 ; 0x10 800554dc: e3a01011 mov r1, #17 ; 0x11 800554e0: eb04426e bl 80165ea0 800554e4: e1a03000 mov r3, r0 800554e8: e583000c str r0, [r3, #12] 800554ec: e5830000 str r0, [r3] 800554f0: e5830004 str r0, [r3, #4] 800554f4: e8bd8008 pop {r3, pc} GCC assumes memset returns the value of pointer 'waiter' in register r0; causing register/memory corruptions. This patch fixes the return value of the assembly version of memset. It adds a 'mov' instruction and merges an additional load+store into existing load/store instructions. For ease of review, here is a breakdown of the patch into 4 simple steps: Step 1 ====== Perform the following substitutions: ip -> r8, then r0 -> ip, and insert 'mov ip, r0' as the first statement of the function. At this point, we have a memset() implementation returning the proper result, but corrupting r8 on some paths (the ones that were using ip). Step 2 ====== Make sure r8 is saved and restored when (! CALGN(1)+0) == 1: save r8: - str lr, [sp, #-4]! + stmfd sp!, {r8, lr} and restore r8 on both exit paths: - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. (...) tst r2, #16 stmneia ip!, {r1, r3, r8, lr} - ldr lr, [sp], #4 + ldmfd sp!, {r8, lr} Step 3 ====== Make sure r8 is saved and restored when (! CALGN(1)+0) == 0: save r8: - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} and restore r8 on both exit paths: bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} (...) tst r2, #16 stmneia ip!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + ldmfd sp!, {r4-r8, lr} Step 4 ====== Rewrite register list "r4-r7, r8" as "r4-r8". Signed-off-by: Ivan Djelic Reviewed-by: Nicolas Pitre Signed-off-by: Dirk Behme Signed-off-by: Russell King --- arch/arm/lib/memset.S | 85 ++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 650d5923ab83..d912e7397ecc 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -19,9 +19,9 @@ 1: subs r2, r2, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) /* * The pointer is now aligned and the length is adjusted. Try doing the @@ -29,10 +29,14 @@ */ ENTRY(memset) - ands r3, r0, #3 @ 1 unaligned? +/* + * Preserve the contents of r0 for the return value. + */ + mov ip, r0 + ands r3, ip, #3 @ 1 unaligned? bne 1b @ 1 /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary. */ orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 @@ -43,29 +47,28 @@ ENTRY(memset) #if ! CALGN(1)+0 /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR */ - str lr, [sp, #-4]! - mov ip, r1 + stmfd sp!, {r8, lr} + mov r8, r1 mov lr, r1 2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} bgt 2b - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} #else @@ -74,54 +77,54 @@ ENTRY(memset) * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} mov r4, r1 mov r5, r1 mov r6, r1 mov r7, r1 - mov ip, r1 + mov r8, r1 mov lr, r1 cmp r2, #96 - tstgt r0, #31 + tstgt ip, #31 ble 3f - and ip, r0, #31 - rsb ip, ip, #32 - sub r2, r2, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - tst ip, #(1 << 30) - mov ip, r1 - strne r1, [r0], #4 + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 3: subs r2, r2, #64 - stmgeia r0!, {r1, r3-r7, ip, lr} - stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} tst r2, #32 - stmneia r0!, {r1, r3-r7, ip, lr} + stmneia ip!, {r1, r3-r8, lr} tst r2, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} #endif 4: tst r2, #8 - stmneia r0!, {r1, r3} + stmneia ip!, {r1, r3} tst r2, #4 - strne r1, [r0], #4 + strne r1, [ip], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 + strneb r1, [ip], #1 tst r2, #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 mov pc, lr ENDPROC(memset) From cc9945bf9cac03860b2f7d59882263c965c6e3af Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Feb 2013 16:17:33 -0500 Subject: [PATCH 210/261] drm/radeon: don't set hpd, afmt interrupts when interrupts are disabled Avoids splatter if the interrupt handler is not registered due to acceleration being disabled. Signed-off-by: Alex Deucher Reviewed-by: Jerome Glisse Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_irq_kms.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 90374dd77960..48f80cd42d8f 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -400,6 +400,9 @@ void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) { unsigned long irqflags; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.afmt[block] = true; radeon_irq_set(rdev); @@ -419,6 +422,9 @@ void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) { unsigned long irqflags; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.afmt[block] = false; radeon_irq_set(rdev); @@ -438,6 +444,9 @@ void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask) unsigned long irqflags; int i; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) rdev->irq.hpd[i] |= !!(hpd_mask & (1 << i)); @@ -458,6 +467,9 @@ void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask) unsigned long irqflags; int i; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) rdev->irq.hpd[i] &= !(hpd_mask & (1 << i)); From e8fc41377f5037ff7a661ea06adc05f1daec1548 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Feb 2013 12:01:58 -0500 Subject: [PATCH 211/261] drm/radeon: add primary dac adj quirk for R200 board vbios values are wrong leading to colors that are too bright. Use the default values instead. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_combios.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 3e403bdda58f..78edadc9e86b 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -970,6 +970,15 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct found = 1; } + /* quirks */ + /* Radeon 9100 (R200) */ + if ((dev->pdev->device == 0x514D) && + (dev->pdev->subsystem_vendor == 0x174B) && + (dev->pdev->subsystem_device == 0x7149)) { + /* vbios value is bad, use the default */ + found = 0; + } + if (!found) /* fallback to defaults */ radeon_legacy_get_primary_dac_info_from_table(rdev, p_dac); From d808fc882928bfe3cab87dd960ca28715e461ce4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Feb 2013 10:03:08 -0500 Subject: [PATCH 212/261] drm/radeon: skip MC reset as it's probably not hung The MC is mostly likely busy (e.g., display requests), not hung so no need to reset it. Doing an MC reset is tricky and not particularly reliable. Fixes hangs in certain cases. Reported-by: Josh Boyer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen.c | 6 ++++++ drivers/gpu/drm/radeon/ni.c | 6 ++++++ drivers/gpu/drm/radeon/r600.c | 6 ++++++ drivers/gpu/drm/radeon/si.c | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 3c38ea46531c..305a657bf215 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2438,6 +2438,12 @@ static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) if (tmp & L2_BUSY) reset_mask |= RADEON_RESET_VMC; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 7cead763be9e..d4c633e12863 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1381,6 +1381,12 @@ static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev) if (tmp & L2_BUSY) reset_mask |= RADEON_RESET_VMC; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 6d4b5611daf4..0740db3fcd22 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1394,6 +1394,12 @@ static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) if (r600_is_display_hung(rdev)) reset_mask |= RADEON_RESET_DISPLAY; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 80979ed951eb..9128120da044 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2284,6 +2284,12 @@ static u32 si_gpu_check_soft_reset(struct radeon_device *rdev) if (tmp & L2_BUSY) reset_mask |= RADEON_RESET_VMC; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } From 774c389fae5e5a78a4aa75f927ab59fa0ff8a0d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 1 Mar 2013 13:40:31 +0100 Subject: [PATCH 213/261] drm/radeon: don't check mipmap alignment if MIP_ADDRESS is FMASK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MIP_ADDRESS state has 2 meanings. If the texture has one sample per pixel, it's a pointer to the mipmap chain. If the texture has multiple samples per pixel, it's a pointer to FMASK, a metadata buffer needed for reading compressed MSAA textures. The mipmap alignment rules do not apply to FMASK. Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 2 +- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 99fb13286fd0..eb8ac315f92f 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -834,7 +834,7 @@ static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, __func__, __LINE__, toffset, surf.base_align); return -EINVAL; } - if (moffset & (surf.base_align - 1)) { + if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) { dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n", __func__, __LINE__, moffset, surf.base_align); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 167758488ed6..66a7f0fd9620 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -70,9 +70,10 @@ * 2.27.0 - r600-SI: Add CS ioctl support for async DMA * 2.28.0 - r600-eg: Add MEM_WRITE packet support * 2.29.0 - R500 FP16 color clear registers + * 2.30.0 - fix for FMASK texturing */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 29 +#define KMS_DRIVER_MINOR 30 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); From 60f583d56aa515b896a9d94f860f52640c1e8a75 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Mar 2013 08:31:51 -0800 Subject: [PATCH 214/261] x86: Do not try to sync identity map for non-mapped pages kernel_map_sync_memtype() is called from a variety of contexts. The pat.c code that calls it seems to ensure that it is not called for non-ram areas by checking via pat_pagerange_is_ram(). It is important that it only be called on the actual identity map because there *IS* no map to sync for highmem pages, or for memory holes. The ioremap.c uses are not as careful as those from pat.c, and call kernel_map_sync_memtype() on PCI space which is in the middle of the kernel identity map _range_, but is not actually mapped. This patch adds a check to kernel_map_sync_memtype() which probably duplicates some of the checks already in pat.c. But, it is necessary for the ioremap.c uses and shouldn't hurt other callers. I have reproduced this bug and this patch fixes it for me and the original bug reporter: https://lkml.org/lkml/2013/2/5/396 Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130307163151.D9B58C4E@kernel.stglabs.ibm.com Signed-off-by: Dave Hansen Tested-by: Tetsuo Handa Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2610bd93c896..657438858e83 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -563,6 +563,13 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) if (base > __pa(high_memory-1)) return 0; + /* + * some areas in the middle of the kernel identity range + * are not mapped, like the PCI space. + */ + if (!page_is_ram(base >> PAGE_SHIFT)) + return 0; + id_sz = (__pa(high_memory-1) <= base + size) ? __pa(high_memory) - base : size; From cc59487a05b1aae6987b4a5d56603ed3e603f82e Mon Sep 17 00:00:00 2001 From: Christopher Harvey Date: Tue, 26 Feb 2013 10:54:22 -0500 Subject: [PATCH 215/261] drm/mgag200: 'fbdev_list' in 'struct mga_fbdev' is not used Signed-off-by: Christopher Harvey Signed-off-by: Dave Airlie --- drivers/gpu/drm/mgag200/mgag200_drv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 5ea5033eae0a..4d932c46725d 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -112,7 +112,6 @@ struct mga_framebuffer { struct mga_fbdev { struct drm_fb_helper helper; struct mga_framebuffer mfb; - struct list_head fbdev_list; void *sysram; int size; struct ttm_bo_kmap_obj mapping; From 0ba53171583f86bbcbba951fe172982f7fc3761c Mon Sep 17 00:00:00 2001 From: Christopher Harvey Date: Tue, 26 Feb 2013 10:55:44 -0500 Subject: [PATCH 216/261] drm/mgag200: Reject modes that are too big for VRAM A monitor or a user could request a resolution greater than the available VRAM for the backing framebuffer. This change checks the required framebuffer size against the max VRAM size and rejects modes if they are too big. This change can also remove a mode request passed in via the video= parameter. Signed-off-by: Christopher Harvey Signed-off-by: Dave Airlie --- drivers/gpu/drm/mgag200/mgag200_mode.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index d3d99a28ddef..a274b9906ef8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1406,6 +1406,14 @@ static int mga_vga_get_modes(struct drm_connector *connector) static int mga_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_device *dev = connector->dev; + struct mga_device *mdev = (struct mga_device*)dev->dev_private; + struct mga_fbdev *mfbdev = mdev->mfbdev; + struct drm_fb_helper *fb_helper = &mfbdev->helper; + struct drm_fb_helper_connector *fb_helper_conn = NULL; + int bpp = 32; + int i = 0; + /* FIXME: Add bandwidth and g200se limitations */ if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 || @@ -1415,6 +1423,25 @@ static int mga_vga_mode_valid(struct drm_connector *connector, return MODE_BAD; } + /* Validate the mode input by the user */ + for (i = 0; i < fb_helper->connector_count; i++) { + if (fb_helper->connector_info[i]->connector == connector) { + /* Found the helper for this connector */ + fb_helper_conn = fb_helper->connector_info[i]; + if (fb_helper_conn->cmdline_mode.specified) { + if (fb_helper_conn->cmdline_mode.bpp_specified) { + bpp = fb_helper_conn->cmdline_mode.bpp; + } + } + } + } + + if ((mode->hdisplay * mode->vdisplay * (bpp/8)) > mdev->mc.vram_size) { + if (fb_helper_conn) + fb_helper_conn->cmdline_mode.specified = false; + return MODE_BAD; + } + return MODE_OK; } From ce495960ff33f96362cf81f0eb7c52d1a89f64be Mon Sep 17 00:00:00 2001 From: Julia Lemire Date: Thu, 7 Mar 2013 10:41:03 -0500 Subject: [PATCH 217/261] drm/mgag200: Bug fix: Renesas board now selects native resolution. Renesas boards were consistently defaulting to the 1024x768 resolution, regardless of the native resolution of the monitor plugged in. It was determined that the EDID of the monitor was not being read. Since the DAC is a shared line, in order to read from or write to it we must take control of the DAC clock. This can be done by setting the proper register to one. This bug fix sets the register MGA1064_GEN_IO_CTL2 to one. The DAC control line can be used to determine whether or not a new monitor has been plugged in. But since the hotplug feature is not one we will support, it has been decided to simply leave the register set to one. Signed-off-by: Julia Lemire Signed-off-by: Dave Airlie --- drivers/gpu/drm/mgag200/mgag200_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mgag200/mgag200_i2c.c b/drivers/gpu/drm/mgag200/mgag200_i2c.c index 5a88ec51b513..d3dcf54e6233 100644 --- a/drivers/gpu/drm/mgag200/mgag200_i2c.c +++ b/drivers/gpu/drm/mgag200/mgag200_i2c.c @@ -92,6 +92,7 @@ struct mga_i2c_chan *mgag200_i2c_create(struct drm_device *dev) int ret; int data, clock; + WREG_DAC(MGA1064_GEN_IO_CTL2, 1); WREG_DAC(MGA1064_GEN_IO_DATA, 0xff); WREG_DAC(MGA1064_GEN_IO_CTL, 0); From 13bcf01b33e6e19a7fe7ff396f9ed02803e225ec Mon Sep 17 00:00:00 2001 From: Christopher Harvey Date: Thu, 7 Mar 2013 10:42:25 -0500 Subject: [PATCH 218/261] drm: Documentation typo fixes Signed-off-by: Christopher Harvey Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 8839b3a24660..e3e0d651c6ca 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -443,12 +443,12 @@ struct drm_crtc { * @dpms: set power state (see drm_crtc_funcs above) * @save: save connector state * @restore: restore connector state - * @reset: reset connector after state has been invalidate (e.g. resume) + * @reset: reset connector after state has been invalidated (e.g. resume) * @detect: is this connector active? * @fill_modes: fill mode list for this connector - * @set_property: property for this connector may need update + * @set_property: property for this connector may need an update * @destroy: make object go away - * @force: notify the driver the connector is forced on + * @force: notify the driver that the connector is forced on * * Each CRTC may have one or more connectors attached to it. The functions * below allow the core DRM code to control connectors, enumerate available modes, From 36c1813bb453dd078e49bc5b3c1bf7d13535d9ff Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 5 Mar 2013 22:07:36 +0100 Subject: [PATCH 219/261] drm/tegra: drop "select DRM_HDMI" Commit ac24c2204a76e5b42aa103bf963ae0eda1b827f3 ("drm/tegra: Use generic HDMI infoframe helpers") added "select DRM_HDMI" to the DRM_TEGRA Kconfig entry. But there is no Kconfig symbol named DRM_HDMI. The select statement for that symbol is a nop. Drop it. What was needed to use HDMI functionality was to select HDMI (which this entry already did through depending on DRM) and to include linux/hdmi.h (which this commit also did). Signed-off-by: Paul Bolle Acked-by: Thierry Reding Signed-off-by: Dave Airlie --- drivers/gpu/drm/tegra/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index c92955df0658..be1daf7344d3 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -4,7 +4,6 @@ config DRM_TEGRA select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER select DRM_KMS_CMA_HELPER - select DRM_HDMI select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT From 7b54c165a0c012edbaeaa73c5c87cb73721eb580 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Mar 2013 09:03:07 -0800 Subject: [PATCH 220/261] vfs: don't BUG_ON() if following a /proc fd pseudo-symlink results in a symlink It's "normal" - it can happen if the file descriptor you followed was opened with O_NOFOLLOW. Reported-by: Dave Jones Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 961bc1268366..57ae9c8c66bf 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -689,8 +689,6 @@ void nd_jump_link(struct nameidata *nd, struct path *path) nd->path = *path; nd->inode = nd->path.dentry->d_inode; nd->flags |= LOOKUP_JUMPED; - - BUG_ON(nd->inode->i_op->follow_link); } static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) From e1082f45f1e2bbf6e25f6b614fc6616ebf709d19 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 8 Mar 2013 12:43:26 -0800 Subject: [PATCH 221/261] ipc: fix potential oops when src msg > 4k w/ MSG_COPY If the src msg is > 4k, then dest->next points to the next allocated segment; resetting it just prior to dereferencing is bad. Signed-off-by: Peter Hurley Acked-by: Stanislav Kinsbursky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msgutil.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/ipc/msgutil.c b/ipc/msgutil.c index ebfcbfa8b7f2..5df8e4bf1db0 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -117,9 +117,6 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) if (alen > DATALEN_MSG) alen = DATALEN_MSG; - dst->next = NULL; - dst->security = NULL; - memcpy(dst + 1, src + 1, alen); len -= alen; From 88b9e456b1649722673ffa147914299799dc9041 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 8 Mar 2013 12:43:27 -0800 Subject: [PATCH 222/261] ipc: don't allocate a copy larger than max When MSG_COPY is set, a duplicate message must be allocated for the copy before locking the queue. However, the copy could not be larger than was sent which is limited to msg_ctlmax. Signed-off-by: Peter Hurley Acked-by: Stanislav Kinsbursky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 950572f9d796..31cd1bf6af27 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -820,15 +820,17 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct msg_msg *copy = NULL; unsigned long copy_number = 0; + ns = current->nsproxy->ipc_ns; + if (msqid < 0 || (long) bufsz < 0) return -EINVAL; if (msgflg & MSG_COPY) { - copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); + copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax), + msgflg, &msgtyp, ©_number); if (IS_ERR(copy)) return PTR_ERR(copy); } mode = convert_mode(&msgtyp, msgflg); - ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); if (IS_ERR(msq)) { From 5ca3957510b9fc2a14d3647db518014842f9a2b4 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Fri, 8 Mar 2013 12:43:28 -0800 Subject: [PATCH 223/261] mm/mempolicy.c: fix wrong sp_node insertion n->end is accessed in sp_insert(). Thus it should be update before calling sp_insert(). This mistake may make kernel panic. Signed-off-by: Hillf Danton Signed-off-by: KOSAKI Motohiro Cc: Sasha Levin Cc: Hugh Dickins Cc: Mel Gorman Cc: Dave Jones Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 31d26637b658..868d08f49f07 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2391,8 +2391,8 @@ restart: *mpol_new = *n->policy; atomic_set(&mpol_new->refcnt, 1); sp_node_init(n_new, n->end, end, mpol_new); - sp_insert(sp, n_new); n->end = start; + sp_insert(sp, n_new); n_new = NULL; mpol_new = NULL; break; From 7880639c3e4fde5953ff243ee52204ddc5af641b Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 8 Mar 2013 12:43:29 -0800 Subject: [PATCH 224/261] mm/mempolicy.c: fix sp_node_init() argument ordering Currently, n_new is wrongly initialized. start and end parameter are inverted. Let's fix it. Signed-off-by: KOSAKI Motohiro Cc: Hillf Danton Cc: Sasha Levin Cc: Hugh Dickins Cc: Mel Gorman Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 868d08f49f07..74310017296e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2390,7 +2390,7 @@ restart: *mpol_new = *n->policy; atomic_set(&mpol_new->refcnt, 1); - sp_node_init(n_new, n->end, end, mpol_new); + sp_node_init(n_new, end, n->end, mpol_new); n->end = start; sp_insert(sp, n_new); n_new = NULL; From 2e1c9b2867656ff9a469d23e1dfe90cf77ec0c72 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 Mar 2013 12:43:30 -0800 Subject: [PATCH 225/261] idr: remove WARN_ON_ONCE() on negative IDs idr_find(), idr_remove() and idr_replace() used to silently ignore the sign bit and perform lookup with the rest of the bits. The weird behavior has been changed such that negative IDs are treated as invalid. As the behavior change was subtle, WARN_ON_ONCE() was added in the hope of determining who's calling idr functions with negative IDs so that they can be examined for problems. Up until now, all two reported cases are ID number coming directly from userland and getting fed into idr_find() and the warnings seem to cause more problems than being helpful. Drop the WARN_ON_ONCE()s. Signed-off-by: Tejun Heo Reported-by: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index 73f4d53c02f3..00739aaf95a2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -569,8 +569,7 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *p; struct idr_layer *to_free; - /* see comment in idr_find_slowpath() */ - if (WARN_ON_ONCE(id < 0)) + if (id < 0) return; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); @@ -667,15 +666,7 @@ void *idr_find_slowpath(struct idr *idp, int id) int n; struct idr_layer *p; - /* - * If @id is negative, idr_find() used to ignore the sign bit and - * performed lookup with the rest of bits, which is weird and can - * lead to very obscure bugs. We're now returning NULL for all - * negative IDs but just in case somebody was depending on the sign - * bit being ignored, let's trigger WARN_ON_ONCE() so that they can - * be detected and fixed. WARN_ON_ONCE() can later be removed. - */ - if (WARN_ON_ONCE(id < 0)) + if (id < 0) return NULL; p = rcu_dereference_raw(idp->top); @@ -824,8 +815,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) int n; struct idr_layer *p, *old_p; - /* see comment in idr_find_slowpath() */ - if (WARN_ON_ONCE(id < 0)) + if (id < 0) return ERR_PTR(-EINVAL); p = idp->top; From dc893e19b5800d7743fb58235877bfa9091805ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Mar 2013 12:43:31 -0800 Subject: [PATCH 226/261] Revert parts of "hlist: drop the node parameter from iterators" Commit b67bfe0d42ca ("hlist: drop the node parameter from iterators") did a lot of nice changes but also contains two small hunks that seem to have slipped in accidentally and have no apparent connection to the intent of the patch. This reverts the two extraneous changes. Signed-off-by: Arnd Bergmann Cc: Peter Senna Tschudin Cc: Paul E. McKenney Cc: Sasha Levin Cc: Thomas Gleixner Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/smpboot.c | 2 +- net/9p/trans_virtio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 25d3d8b6e4e1..8eaed9aa9cf0 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -131,7 +131,7 @@ static int smpboot_thread_fn(void *data) continue; } - //BUG_ON(td->cpu != smp_processor_id()); + BUG_ON(td->cpu != smp_processor_id()); /* Check for state change setup */ switch (td->status) { diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 74dea377fe5b..de2e950a0a7a 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -655,7 +655,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, - //.zc_request = p9_virtio_zc_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, /* * We leave one entry for input and one entry for response From a40e7cf8f06b4e322ba902e4e9f6a6b0c2daa907 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 8 Mar 2013 12:43:32 -0800 Subject: [PATCH 227/261] dmi_scan: fix missing check for _DMI_ signature in smbios_present() Commit 9f9c9cbb6057 ("drivers/firmware/dmi_scan.c: fetch dmi version from SMBIOS if it exists") hoisted the check for "_DMI_" into dmi_scan_machine(), which means that we don't bother to check for "_DMI_" at offset 16 in an SMBIOS entry. smbios_present() may also call dmi_present() for an address where we found "_SM_", if it failed further validation. Check for "_DMI_" in smbios_present() before calling dmi_present(). [akpm@linux-foundation.org: fix build] Signed-off-by: Ben Hutchings Reported-by: Tim McGrath Tested-by: Tim Mcgrath Cc: Zhenzhong Duan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 982f1f5f5742..4cd392dbf115 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -442,7 +442,6 @@ static int __init dmi_present(const char __iomem *p) static int __init smbios_present(const char __iomem *p) { u8 buf[32]; - int offset = 0; memcpy_fromio(buf, p, 32); if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { @@ -461,9 +460,9 @@ static int __init smbios_present(const char __iomem *p) dmi_ver = 0x0206; break; } - offset = 16; + return memcmp(p + 16, "_DMI_", 5) || dmi_present(p + 16); } - return dmi_present(buf + offset); + return 1; } void __init dmi_scan_machine(void) From d8fc16a825eb7780db71268a8502fb3e6af95753 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 8 Mar 2013 12:43:34 -0800 Subject: [PATCH 228/261] ksm: fix m68k build: only NUMA needs pfn_to_nid A CONFIG_DISCONTIGMEM=y m68k config gave mm/ksm.c: In function `get_kpfn_nid': mm/ksm.c:492: error: implicit declaration of function `pfn_to_nid' linux/mmzone.h declares it for CONFIG_SPARSEMEM and CONFIG_FLATMEM, but expects the arch's asm/mmzone.h to declare it for CONFIG_DISCONTIGMEM (see arch/mips/include/asm/mmzone.h for example). Or perhaps it is only expected when CONFIG_NUMA=y: too much of a maze, and m68k got away without it so far, so fix the build in mm/ksm.c. Signed-off-by: Hugh Dickins Reported-by: Geert Uytterhoeven Cc: Petr Holasek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 85bfd4c16346..b6afe0c440d8 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -489,7 +489,7 @@ out: page = NULL; */ static inline int get_kpfn_nid(unsigned long kpfn) { - return ksm_merge_across_nodes ? 0 : pfn_to_nid(kpfn); + return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn)); } static void remove_node_from_stable_tree(struct stable_node *stable_node) From 755727b7fb1e0ebe46824159107749cf635d43b1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 8 Mar 2013 12:43:35 -0800 Subject: [PATCH 229/261] Randy has moved Update email address and CREDITS info. xenotime.net is defunct. Signed-off-by: Randy Dunlap Cc: Harry Wei Cc: Keiichi KII Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 6 +++--- Documentation/SubmittingPatches | 3 +-- Documentation/printk-formats.txt | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CREDITS b/CREDITS index 948e0fb9a70e..78163cb3eb6a 100644 --- a/CREDITS +++ b/CREDITS @@ -953,11 +953,11 @@ S: Blacksburg, Virginia 24061 S: USA N: Randy Dunlap -E: rdunlap@xenotime.net -W: http://www.xenotime.net/linux/linux.html -W: http://www.linux-usb.org +E: rdunlap@infradead.org +W: http://www.infradead.org/~rdunlap/ D: Linux-USB subsystem, USB core/UHCI/printer/storage drivers D: x86 SMP, ACPI, bootflag hacking +D: documentation, builds S: (ask for current address) S: USA diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index c379a2a6949f..aa0c1e63f050 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -60,8 +60,7 @@ own source tree. For example: "dontdiff" is a list of files which are generated by the kernel during the build process, and should be ignored in any diff(1)-generated patch. The "dontdiff" file is included in the kernel tree in -2.6.12 and later. For earlier kernel versions, you can get it -from . +2.6.12 and later. Make sure your patch does not include any extra files which do not belong in a patch submission. Make sure to review your patch -after- diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index e8a6aa473bab..6e953564de03 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -170,5 +170,5 @@ Reminder: sizeof() result is of type size_t. Thank you for your cooperation and attention. -By Randy Dunlap and +By Randy Dunlap and Andrew Murray From 15cf17d26e08ee95c2e392a3a71f55d32e99e971 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 8 Mar 2013 12:43:36 -0800 Subject: [PATCH 230/261] memcg: initialize kmem-cache destroying work earlier Fix a warning from lockdep caused by calling cancel_work_sync() for uninitialized struct work. This path has been triggered by destructon kmem-cache hierarchy via destroying its root kmem-cache. cache ffff88003c072d80 obj ffff88003b410000 cache ffff88003c072d80 obj ffff88003b924000 cache ffff88003c20bd40 INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. Pid: 2825, comm: insmod Tainted: G O 3.9.0-rc1-next-20130307+ #611 Call Trace: __lock_acquire+0x16a2/0x1cb0 lock_acquire+0x8a/0x120 flush_work+0x38/0x2a0 __cancel_work_timer+0x89/0xf0 cancel_work_sync+0xb/0x10 kmem_cache_destroy_memcg_children+0x81/0xb0 kmem_cache_destroy+0xf/0xe0 init_module+0xcb/0x1000 [kmem_test] do_one_initcall+0x11a/0x170 load_module+0x19b0/0x2320 SyS_init_module+0xc6/0xf0 system_call_fastpath+0x16/0x1b Example module to demonstrate: #include #include #include #include int __init mod_init(void) { int size = 256; struct kmem_cache *cache; void *obj; struct page *page; cache = kmem_cache_create("kmem_cache_test", size, size, 0, NULL); if (!cache) return -ENOMEM; printk("cache %p\n", cache); obj = kmem_cache_alloc(cache, GFP_KERNEL); if (obj) { page = virt_to_head_page(obj); printk("obj %p cache %p\n", obj, page->slab_cache); kmem_cache_free(cache, obj); } flush_scheduled_work(); obj = kmem_cache_alloc(cache, GFP_KERNEL); if (obj) { page = virt_to_head_page(obj); printk("obj %p cache %p\n", obj, page->slab_cache); kmem_cache_free(cache, obj); } kmem_cache_destroy(cache); return -EBUSY; } module_init(mod_init); MODULE_LICENSE("GPL"); Signed-off-by: Konstantin Khlebnikov Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 53b8201b31eb..2b552224f5cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3012,6 +3012,8 @@ void memcg_update_array_size(int num) memcg_limited_groups_array_size = memcg_caches_array_size(num); } +static void kmem_cache_destroy_work_func(struct work_struct *w); + int memcg_update_cache_size(struct kmem_cache *s, int num_groups) { struct memcg_cache_params *cur_params = s->memcg_params; @@ -3031,6 +3033,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) return -ENOMEM; } + INIT_WORK(&s->memcg_params->destroy, + kmem_cache_destroy_work_func); s->memcg_params->is_root_cache = true; /* @@ -3078,6 +3082,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, if (!s->memcg_params) return -ENOMEM; + INIT_WORK(&s->memcg_params->destroy, + kmem_cache_destroy_work_func); if (memcg) { s->memcg_params->memcg = memcg; s->memcg_params->root_cache = root_cache; @@ -3358,8 +3364,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) list_for_each_entry(params, &memcg->memcg_slab_caches, list) { cachep = memcg_params_to_cache(params); cachep->memcg_params->dead = true; - INIT_WORK(&cachep->memcg_params->destroy, - kmem_cache_destroy_work_func); schedule_work(&cachep->memcg_params->destroy); } mutex_unlock(&memcg->slab_caches_mutex); From c3d6b628395fe6ec3442a83ddf02334c54867d43 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Mar 2013 12:43:37 -0800 Subject: [PATCH 231/261] alpha: boot: fix build breakage introduced by system.h disintegration Commit ec2212088c42 ("Disintegrate asm/system.h for Alpha") removed the system.h include from boot/head.S, which puts the PAL_* asm constants out of scope. Include so we can get building again. Signed-off-by: Will Deacon Cc: David Rusling Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/boot/head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/boot/head.S b/arch/alpha/boot/head.S index b06812bcac83..8efb26686d47 100644 --- a/arch/alpha/boot/head.S +++ b/arch/alpha/boot/head.S @@ -4,6 +4,7 @@ * initial bootloader stuff.. */ +#include .set noreorder .globl __start From 22dfab7fd7fd5a8a2c5556ca0a8fd35fc959abc8 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 7 Mar 2013 19:43:33 -0800 Subject: [PATCH 232/261] Input: atmel_mxt_ts - Support for touchpad variant This same driver can be used by atmel based touchscreens and touchpads (buttonpads). Platform data may specify a device is a touchpad using the is_tp flag. This will cause the driver to perform some touchpad specific initializations, such as: * register input device name "Atmel maXTouch Touchpad" instead of Touchscreen. * register BTN_LEFT & BTN_TOOL_* event types. * register axis resolution (as a fixed constant, for now) * register BUTTONPAD property * process GPIO buttons using reportid T19 Input event GPIO mapping is done by the platform data key_map array. key_map[x] should contain the KEY or BTN code to send when processing GPIOx from T19. To specify a GPIO as not an input source, populate with KEY_RESERVED, or 0. Signed-off-by: Daniel Kurtz Signed-off-by: Benson Leung Signed-off-by: Nick Dyer Tested-by: Olof Johansson Signed-off-by: Linus Torvalds --- drivers/input/touchscreen/atmel_mxt_ts.c | 64 +++++++++++++++++++++++- include/linux/i2c/atmel_mxt_ts.h | 5 ++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index d04f810cb1dd..8ed279c56d27 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -181,6 +181,12 @@ #define MXT_FWRESET_TIME 175 /* msec */ +/* MXT_SPT_GPIOPWM_T19 field */ +#define MXT_GPIO0_MASK 0x04 +#define MXT_GPIO1_MASK 0x08 +#define MXT_GPIO2_MASK 0x10 +#define MXT_GPIO3_MASK 0x20 + /* Command to unlock bootloader */ #define MXT_UNLOCK_CMD_MSB 0xaa #define MXT_UNLOCK_CMD_LSB 0xdc @@ -212,6 +218,8 @@ /* Touchscreen absolute values */ #define MXT_MAX_AREA 0xff +#define MXT_PIXELS_PER_MM 20 + struct mxt_info { u8 family_id; u8 variant_id; @@ -243,6 +251,8 @@ struct mxt_data { const struct mxt_platform_data *pdata; struct mxt_object *object_table; struct mxt_info info; + bool is_tp; + unsigned int irq; unsigned int max_x; unsigned int max_y; @@ -251,6 +261,7 @@ struct mxt_data { u8 T6_reportid; u8 T9_reportid_min; u8 T9_reportid_max; + u8 T19_reportid; }; static bool mxt_object_readable(unsigned int type) @@ -502,6 +513,21 @@ static int mxt_write_object(struct mxt_data *data, return mxt_write_reg(data->client, reg + offset, val); } +static void mxt_input_button(struct mxt_data *data, struct mxt_message *message) +{ + struct input_dev *input = data->input_dev; + bool button; + int i; + + /* Active-low switch */ + for (i = 0; i < MXT_NUM_GPIO; i++) { + if (data->pdata->key_map[i] == KEY_RESERVED) + continue; + button = !(message->message[0] & MXT_GPIO0_MASK << i); + input_report_key(input, data->pdata->key_map[i], button); + } +} + static void mxt_input_touchevent(struct mxt_data *data, struct mxt_message *message, int id) { @@ -585,6 +611,9 @@ static irqreturn_t mxt_interrupt(int irq, void *dev_id) int id = reportid - data->T9_reportid_min; mxt_input_touchevent(data, &message, id); update_input = true; + } else if (message.reportid == data->T19_reportid) { + mxt_input_button(data, &message); + update_input = true; } else { mxt_dump_message(dev, &message); } @@ -764,6 +793,9 @@ static int mxt_get_object_table(struct mxt_data *data) data->T9_reportid_min = min_id; data->T9_reportid_max = max_id; break; + case MXT_SPT_GPIOPWM_T19: + data->T19_reportid = min_id; + break; } } @@ -777,7 +809,7 @@ static void mxt_free_object_table(struct mxt_data *data) data->T6_reportid = 0; data->T9_reportid_min = 0; data->T9_reportid_max = 0; - + data->T19_reportid = 0; } static int mxt_initialize(struct mxt_data *data) @@ -1115,9 +1147,13 @@ static int mxt_probe(struct i2c_client *client, goto err_free_mem; } - input_dev->name = "Atmel maXTouch Touchscreen"; + data->is_tp = pdata && pdata->is_tp; + + input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" : + "Atmel maXTouch Touchscreen"; snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0", client->adapter->nr, client->addr); + input_dev->phys = data->phys; input_dev->id.bustype = BUS_I2C; @@ -1140,6 +1176,29 @@ static int mxt_probe(struct i2c_client *client, __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_TOUCH, input_dev->keybit); + if (data->is_tp) { + int i; + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit); + + for (i = 0; i < MXT_NUM_GPIO; i++) + if (pdata->key_map[i] != KEY_RESERVED) + __set_bit(pdata->key_map[i], input_dev->keybit); + + __set_bit(BTN_TOOL_FINGER, input_dev->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit); + + input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_MT_POSITION_X, + MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_MT_POSITION_Y, + MXT_PIXELS_PER_MM); + } + /* For single touch */ input_set_abs_params(input_dev, ABS_X, 0, data->max_x, 0, 0); @@ -1258,6 +1317,7 @@ static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume); static const struct i2c_device_id mxt_id[] = { { "qt602240_ts", 0 }, { "atmel_mxt_ts", 0 }, + { "atmel_mxt_tp", 0 }, { "mXT224", 0 }, { } }; diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index f027f7a63511..99e379b74398 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -15,6 +15,9 @@ #include +/* For key_map array */ +#define MXT_NUM_GPIO 4 + /* Orient */ #define MXT_NORMAL 0x0 #define MXT_DIAGONAL 0x1 @@ -39,6 +42,8 @@ struct mxt_platform_data { unsigned int voltage; unsigned char orient; unsigned long irqflags; + bool is_tp; + const unsigned int key_map[MXT_NUM_GPIO]; }; #endif /* __LINUX_ATMEL_MXT_TS_H */ From 2ef392042debb86003e3e1d756960a2e53930b60 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Thu, 7 Mar 2013 19:43:34 -0800 Subject: [PATCH 233/261] Platform: x86: chromeos_laptop : Add basic platform data for atmel devices Add basic platform data to get the current upstream driver working with the 224s touchpad and 1664s touchscreen. We will be using NULL config so we will use the settings from the devices' NVRAMs. Signed-off-by: Benson Leung Tested-by: Olof Johansson Signed-off-by: Linus Torvalds --- drivers/platform/x86/chromeos_laptop.c | 41 ++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/chromeos_laptop.c b/drivers/platform/x86/chromeos_laptop.c index 93d66809355a..3e5b4497a1d0 100644 --- a/drivers/platform/x86/chromeos_laptop.c +++ b/drivers/platform/x86/chromeos_laptop.c @@ -23,6 +23,9 @@ #include #include +#include +#include +#include #include #define ATMEL_TP_I2C_ADDR 0x4b @@ -67,15 +70,49 @@ static struct i2c_board_info __initdata tsl2563_als_device = { I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR), }; +static struct mxt_platform_data atmel_224s_tp_platform_data = { + .x_line = 18, + .y_line = 12, + .x_size = 102*20, + .y_size = 68*20, + .blen = 0x80, /* Gain setting is in upper 4 bits */ + .threshold = 0x32, + .voltage = 0, /* 3.3V */ + .orient = MXT_VERTICAL_FLIP, + .irqflags = IRQF_TRIGGER_FALLING, + .is_tp = true, + .key_map = { KEY_RESERVED, + KEY_RESERVED, + KEY_RESERVED, + BTN_LEFT }, + .config = NULL, + .config_length = 0, +}; + static struct i2c_board_info __initdata atmel_224s_tp_device = { I2C_BOARD_INFO("atmel_mxt_tp", ATMEL_TP_I2C_ADDR), - .platform_data = NULL, + .platform_data = &atmel_224s_tp_platform_data, .flags = I2C_CLIENT_WAKE, }; +static struct mxt_platform_data atmel_1664s_platform_data = { + .x_line = 32, + .y_line = 50, + .x_size = 1700, + .y_size = 2560, + .blen = 0x89, /* Gain setting is in upper 4 bits */ + .threshold = 0x28, + .voltage = 0, /* 3.3V */ + .orient = MXT_ROTATED_90_COUNTER, + .irqflags = IRQF_TRIGGER_FALLING, + .is_tp = false, + .config = NULL, + .config_length = 0, +}; + static struct i2c_board_info __initdata atmel_1664s_device = { I2C_BOARD_INFO("atmel_mxt_ts", ATMEL_TS_I2C_ADDR), - .platform_data = NULL, + .platform_data = &atmel_1664s_platform_data, .flags = I2C_CLIENT_WAKE, }; From db04dc679bcc780ad6907943afe24a30de974a1b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 Mar 2013 00:14:45 -0800 Subject: [PATCH 234/261] proc: Use nd_jump_link in proc_ns_follow_link Update proc_ns_follow_link to use nd_jump_link instead of just manually updating nd.path.dentry. This fixes the BUG_ON(nd->inode != parent->d_inode) reported by Dave Jones and reproduced trivially with mkdir /proc/self/ns/uts/a. Sigh it looks like the VFS change to require use of nd_jump_link happend while proc_ns_follow_link was baking and since the common case of proc_ns_follow_link continued to work without problems the need for making this change was overlooked. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/proc/namespaces.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b7a47196c8c3..66b51c0383da 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -118,7 +118,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) struct super_block *sb = inode->i_sb; struct proc_inode *ei = PROC_I(inode); struct task_struct *task; - struct dentry *ns_dentry; + struct path ns_path; void *error = ERR_PTR(-EACCES); task = get_proc_task(inode); @@ -128,14 +128,14 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); - if (IS_ERR(ns_dentry)) { - error = ERR_CAST(ns_dentry); + ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); + if (IS_ERR(ns_path.dentry)) { + error = ERR_CAST(ns_path.dentry); goto out_put_task; } - dput(nd->path.dentry); - nd->path.dentry = ns_dentry; + ns_path.mnt = mntget(nd->path.mnt); + nd_jump_link(nd, &ns_path); error = NULL; out_put_task: From 8343bce195da8bb4d5a652ee085474a5cc62983f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 Mar 2013 10:31:01 -0800 Subject: [PATCH 235/261] Atmel MXT touchscreen: increase reset timeouts There is a more complete atmel patch-series out by Nick Dyer that fixes this and other things, but in the meantime this is the minimal thing to get the touchscreen going on (at least my) Pixel Chromebook. Not that I want my dirty fingers near that beautiful screen, but it seems that a non-initialized touchscreen will also end up being a constant wakeup source, so you have to disable it to go to sleep. And it's easier to just fix the initialization sequence. Signed-off-by: Linus Torvalds --- drivers/input/touchscreen/atmel_mxt_ts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 8ed279c56d27..59aa24002c7b 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -176,8 +176,8 @@ /* Define for MXT_GEN_COMMAND_T6 */ #define MXT_BOOT_VALUE 0xa5 #define MXT_BACKUP_VALUE 0x55 -#define MXT_BACKUP_TIME 25 /* msec */ -#define MXT_RESET_TIME 65 /* msec */ +#define MXT_BACKUP_TIME 50 /* msec */ +#define MXT_RESET_TIME 200 /* msec */ #define MXT_FWRESET_TIME 175 /* msec */ From f6161aa153581da4a3867a2d1a7caf4be19b6ec9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Mar 2013 16:54:19 -0700 Subject: [PATCH 236/261] Linux 3.9-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5bd9f7700eb9..a05ea42c5f18 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Unicycling Gorilla # *DOCUMENTATION* From 34ed62461ae4970695974afb9a60ac3df0086830 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Jan 2013 13:37:42 -0800 Subject: [PATCH 237/261] rcu: Remove restrictions on no-CBs CPUs Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore at least one no-CBs CPU must remain online at any given time. These restrictions are problematic in some situations, such as cases where all CPUs must run a real-time workload that needs to be insulated from OS jitter and latencies due to RCU callback invocation. This commit therefore provides no-CBs CPUs a (very crude and energy-inefficient) way to start and to wait for grace periods independently of the normal RCU callback mechanisms. This approach allows any or all of the CPUs to be designated as no-CBs CPUs, and allows any proper subset of the CPUs (whether no-CBs CPUs or not) to be offlined. This commit also provides a fix for a locking bug spotted by Xie ChanglongX . Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- init/Kconfig | 4 +- kernel/rcutree.c | 14 ++-- kernel/rcutree.h | 12 +-- kernel/rcutree_plugin.h | 158 ++++++++++++---------------------------- 4 files changed, 58 insertions(+), 130 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 22616cd434bc..c8bd349eb638 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -655,7 +655,7 @@ config RCU_BOOST_DELAY Accept the default if unsure. config RCU_NOCB_CPU - bool "Offload RCU callback processing from boot-selected CPUs" + bool "Offload RCU callback processing from boot-selected CPUs (EXPERIMENTAL" depends on TREE_RCU || TREE_PREEMPT_RCU default n help @@ -673,7 +673,7 @@ config RCU_NOCB_CPU callback, and (2) affinity or cgroups can be used to force the kthreads to run on whatever set of CPUs is desired. - Say Y here if you want reduced OS jitter on selected CPUs. + Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. endmenu # "RCU Subsystem" diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5b8ad827fd86..6ad0716e65dc 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -310,6 +310,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_gp_in_progress(rsp)) return 0; /* No, a grace period is already in progress. */ + if (rcu_nocb_needs_gp(rdp)) + return 1; /* Yes, a no-CBs CPU needs one. */ if (!rdp->nxttail[RCU_NEXT_TAIL]) return 0; /* No, this is a no-CBs (or offline) CPU. */ if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) @@ -1035,10 +1037,11 @@ static void init_callback_list(struct rcu_data *rdp) { int i; + if (init_nocb_callback_list(rdp)) + return; rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; - init_nocb_callback_list(rdp); } /* @@ -2909,7 +2912,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; struct rcu_state *rsp; - int ret = NOTIFY_OK; trace_rcu_utilization("Start CPU hotplug"); switch (action) { @@ -2923,10 +2925,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, rcu_boost_kthread_setaffinity(rnp, -1); break; case CPU_DOWN_PREPARE: - if (nocb_cpu_expendable(cpu)) - rcu_boost_kthread_setaffinity(rnp, cpu); - else - ret = NOTIFY_BAD; + rcu_boost_kthread_setaffinity(rnp, cpu); break; case CPU_DYING: case CPU_DYING_FROZEN: @@ -2950,7 +2949,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, break; } trace_rcu_utilization("End CPU hotplug"); - return ret; + return NOTIFY_OK; } /* @@ -3170,7 +3169,6 @@ void __init rcu_init(void) rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); - rcu_init_nocb(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c896b5045d9d..7af39f4aaac4 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -326,6 +326,7 @@ struct rcu_data { int nocb_p_count_lazy; /* (approximate). */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_kthread; + bool nocb_needs_gp; #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ int cpu; @@ -375,12 +376,6 @@ struct rcu_state { struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*func)(struct rcu_head *head)); -#ifdef CONFIG_RCU_NOCB_CPU - void (*call_remote)(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - /* call_rcu() flavor, but for */ - /* placing on remote CPU. */ -#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ /* The following fields are guarded by the root rcu_node's lock. */ @@ -529,16 +524,15 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); +static int rcu_nocb_needs_gp(struct rcu_data *rdp); static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy); static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp); -static bool nocb_cpu_expendable(int cpu); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); -static void init_nocb_callback_list(struct rcu_data *rdp); -static void __init rcu_init_nocb(void); +static bool init_nocb_callback_list(struct rcu_data *rdp); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c1cc7e17ff9d..44f958a88b21 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -86,10 +86,6 @@ static void __init rcu_bootup_announce_oddness(void) printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_NOCB_CPU if (have_rcu_nocb_mask) { - if (cpumask_test_cpu(0, rcu_nocb_mask)) { - cpumask_clear_cpu(0, rcu_nocb_mask); - pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n"); - } cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); if (rcu_nocb_poll) @@ -2165,6 +2161,14 @@ static int __init parse_rcu_nocb_poll(char *arg) } early_param("rcu_nocb_poll", parse_rcu_nocb_poll); +/* + * Does this CPU needs a grace period due to offloaded callbacks? + */ +static int rcu_nocb_needs_gp(struct rcu_data *rdp) +{ + return rdp->nocb_needs_gp; +} + /* Is the specified CPU a no-CPUs CPU? */ static bool is_nocb_cpu(int cpu) { @@ -2265,95 +2269,39 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, } /* - * There must be at least one non-no-CBs CPU in operation at any given - * time, because no-CBs CPUs are not capable of initiating grace periods - * independently. This function therefore complains if the specified - * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to - * avoid offlining the last such CPU. (Recursion is a wonderful thing, - * but you have to have a base case!) + * If necessary, kick off a new grace period, and either way wait + * for a subsequent grace period to complete. */ -static bool nocb_cpu_expendable(int cpu) +static void rcu_nocb_wait_gp(struct rcu_data *rdp) { - cpumask_var_t non_nocb_cpus; - int ret; + unsigned long c; + unsigned long flags; + unsigned long j; + struct rcu_node *rnp = rdp->mynode; + + raw_spin_lock_irqsave(&rnp->lock, flags); + c = rnp->completed + 2; + rdp->nocb_needs_gp = true; + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* - * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, - * then offlining this CPU is harmless. Let it happen. + * Wait for the grace period. Do so interruptibly to avoid messing + * up the load average. */ - if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) - return 1; - - /* If no memory, play it safe and keep the CPU around. */ - if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) - return 0; - cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); - cpumask_clear_cpu(cpu, non_nocb_cpus); - ret = !cpumask_empty(non_nocb_cpus); - free_cpumask_var(non_nocb_cpus); - return ret; -} - -/* - * Helper structure for remote registry of RCU callbacks. - * This is needed for when a no-CBs CPU needs to start a grace period. - * If it just invokes call_rcu(), the resulting callback will be queued, - * which can result in deadlock. - */ -struct rcu_head_remote { - struct rcu_head *rhp; - call_rcu_func_t *crf; - void (*func)(struct rcu_head *rhp); -}; - -/* - * Register a callback as specified by the rcu_head_remote struct. - * This function is intended to be invoked via smp_call_function_single(). - */ -static void call_rcu_local(void *arg) -{ - struct rcu_head_remote *rhrp = - container_of(arg, struct rcu_head_remote, rhp); - - rhrp->crf(rhrp->rhp, rhrp->func); -} - -/* - * Set up an rcu_head_remote structure and the invoke call_rcu_local() - * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via - * smp_call_function_single(). - */ -static void invoke_crf_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp), - call_rcu_func_t crf) -{ - struct rcu_head_remote rhr; - - rhr.rhp = rhp; - rhr.crf = crf; - rhr.func = func; - smp_call_function_single(0, call_rcu_local, &rhr, 1); -} - -/* - * Helper functions to be passed to wait_rcu_gp(), each of which - * invokes invoke_crf_remote() to register a callback appropriately. - */ -static void __maybe_unused -call_rcu_preempt_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp)) -{ - invoke_crf_remote(rhp, func, call_rcu); -} -static void call_rcu_bh_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp)) -{ - invoke_crf_remote(rhp, func, call_rcu_bh); -} -static void call_rcu_sched_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp)) -{ - invoke_crf_remote(rhp, func, call_rcu_sched); + for (;;) { + j = jiffies; + schedule_timeout_interruptible(2); + raw_spin_lock_irqsave(&rnp->lock, flags); + if (ULONG_CMP_GE(rnp->completed, c)) { + rdp->nocb_needs_gp = false; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + break; + } + if (j == jiffies) + flush_signals(current); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } + smp_mb(); /* Ensure that CB invocation happens after GP end. */ } /* @@ -2390,7 +2338,7 @@ static int rcu_nocb_kthread(void *arg) cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); ACCESS_ONCE(rdp->nocb_p_count) += c; ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; - wait_rcu_gp(rdp->rsp->call_remote); + rcu_nocb_wait_gp(rdp); /* Each pass through the following loop invokes a callback. */ trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); @@ -2443,26 +2391,22 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) } /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ -static void init_nocb_callback_list(struct rcu_data *rdp) +static bool init_nocb_callback_list(struct rcu_data *rdp) { if (rcu_nocb_mask == NULL || !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) - return; + return false; rdp->nxttail[RCU_NEXT_TAIL] = NULL; -} - -/* Initialize the ->call_remote fields in the rcu_state structures. */ -static void __init rcu_init_nocb(void) -{ -#ifdef CONFIG_PREEMPT_RCU - rcu_preempt_state.call_remote = call_rcu_preempt_remote; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - rcu_bh_state.call_remote = call_rcu_bh_remote; - rcu_sched_state.call_remote = call_rcu_sched_remote; + return true; } #else /* #ifdef CONFIG_RCU_NOCB_CPU */ +static int rcu_nocb_needs_gp(struct rcu_data *rdp) +{ + return 0; +} + static bool is_nocb_cpu(int cpu) { return false; @@ -2480,11 +2424,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, return 0; } -static bool nocb_cpu_expendable(int cpu) -{ - return 1; -} - static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { } @@ -2493,12 +2432,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) { } -static void init_nocb_callback_list(struct rcu_data *rdp) -{ -} - -static void __init rcu_init_nocb(void) +static bool init_nocb_callback_list(struct rcu_data *rdp) { + return false; } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ From b5b393601dbce0bce3f0717f29e6c8d1cf0295da Mon Sep 17 00:00:00 2001 From: Jiang Fang Date: Sat, 2 Feb 2013 14:13:42 -0800 Subject: [PATCH 238/261] rcu: Fix spacing problem Signed-off-by: Jiang Fang Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5b8ad827fd86..157539a975df 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -3171,7 +3171,7 @@ void __init rcu_init(void) rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); rcu_init_nocb(); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* * We don't need protection against CPU-hotplug here because From b0f740360efec6e6471547c0548f250bc045a233 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Feb 2013 12:14:24 -0800 Subject: [PATCH 239/261] rcu: Avoid invoking RCU core on offline CPUs Offline CPUs transition through the scheduler to the idle loop one last time before being shut down. This can result in RCU raising softirq on this CPU, which is at best useless given that the CPU's callbacks will be offloaded at CPU_DEAD time. This commit therefore avoids raising softirq on offline CPUs. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 157539a975df..b2fc234ba1b9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2169,7 +2169,8 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) static void invoke_rcu_core(void) { - raise_softirq(RCU_SOFTIRQ); + if (cpu_online(smp_processor_id())) + raise_softirq(RCU_SOFTIRQ); } /* From 0bdf5984ad647ba5d1c09ed66a75e5bf609456ba Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 8 Feb 2013 16:11:29 -0800 Subject: [PATCH 240/261] rcu: Remove comment referring to __stop_machine() Although it used to be that CPU_DYING notifiers executed on the outgoing CPU with interrupts disabled and with all other CPUs spinning, this is no longer the case. This commit therefore removes this obsolete comment. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b2fc234ba1b9..71df6f9f5ce6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2931,11 +2931,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - /* - * The whole machine is "stopped" except this CPU, so we can - * touch any data without introducing corruption. We send the - * dying CPU's callbacks to an arbitrarily chosen online CPU. - */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); rcu_cleanup_after_idle(cpu); From 6f0a6ad2b55053879eee89f812f3527950c380d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Feb 2013 14:22:14 -0800 Subject: [PATCH 241/261] rcu: Delete unused rcu_node "wakemask" field Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c896b5045d9d..8e756099a1a8 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -134,9 +134,6 @@ struct rcu_node { /* elements that need to drain to allow the */ /* current expedited grace period to */ /* complete (only for TREE_PREEMPT_RCU). */ - atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ - /* Since this has meaning only for leaf */ - /* rcu_node structures, 32 bits suffices. */ unsigned long qsmaskinit; /* Per-GP initial value for qsmask & expmask. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ From 49d0de082c31de34cc896c14eec5f1c2ade0415a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Feb 2013 16:42:34 -0800 Subject: [PATCH 242/261] rcu: Fix hlist_bl_set_first_rcu() annotation Abhi noticed that we were getting a complaint from the RCU subsystem about access of an RCU protected list under the write side bit lock. This commit adds additional annotation to check both the RCU read lock and the write side bit lock before printing a message. Reported by: Abhijith Das Signed-off-by: Steven Whitehouse Tested-by: Abhijith Das Signed-off-by: Paul E. McKenney --- include/linux/list_bl.h | 5 +++++ include/linux/rculist_bl.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 31f9d75adc5b..2eb88556c5c5 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -125,6 +125,11 @@ static inline void hlist_bl_unlock(struct hlist_bl_head *b) __bit_spin_unlock(0, (unsigned long *)b); } +static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) +{ + return bit_spin_is_locked(0, (unsigned long *)b); +} + /** * hlist_bl_for_each_entry - iterate over list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index cf1244fbf3b6..4f216c59e7db 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -20,7 +20,7 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) { return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK); + ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } /** From 4357fb570b3709c145384065d04b698a30dc722e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Feb 2013 07:56:27 -0800 Subject: [PATCH 243/261] rcu: Make bugginess of code sample more evident One of the code samples in whatisRCU.txt shows a bug, but someone scanning the document quickly might mistake it for a valid use of RCU. Add some screaming comments to help keep speed-readers on track. Reported-by: Nathan Zimmer Signed-off-by: Paul E. McKenney Acked-by: Rafael J. Wysocki --- Documentation/RCU/whatisRCU.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 0cc7820967f4..10df0b82f459 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -265,9 +265,9 @@ rcu_dereference() rcu_read_lock(); p = rcu_dereference(head.next); rcu_read_unlock(); - x = p->address; + x = p->address; /* BUG!!! */ rcu_read_lock(); - y = p->data; + y = p->data; /* BUG!!! */ rcu_read_unlock(); Holding a reference from one RCU read-side critical section From 3f944adb9d1ca912902783e7aede2a5b5c19a605 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Mar 2013 17:55:49 -0800 Subject: [PATCH 244/261] rcu: Documentation update This commit applies a few updates based on a quick review of the RCU documentations. Signed-off-by: Paul E. McKenney --- Documentation/RCU/checklist.txt | 26 ++++++++++++++++---------- Documentation/RCU/lockdep.txt | 5 +++++ Documentation/RCU/rcubarrier.txt | 15 ++++++++++++++- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 31ef8fe07f82..79e789b8b8ea 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -217,9 +217,14 @@ over a rather long period of time, but improvements are always welcome! whether the increased speed is worth it. 8. Although synchronize_rcu() is slower than is call_rcu(), it - usually results in simpler code. So, unless update performance - is critically important or the updaters cannot block, - synchronize_rcu() should be used in preference to call_rcu(). + usually results in simpler code. So, unless update performance is + critically important, the updaters cannot block, or the latency of + synchronize_rcu() is visible from userspace, synchronize_rcu() + should be used in preference to call_rcu(). Furthermore, + kfree_rcu() usually results in even simpler code than does + synchronize_rcu() without synchronize_rcu()'s multi-millisecond + latency. So please take advantage of kfree_rcu()'s "fire and + forget" memory-freeing capabilities where it applies. An especially important property of the synchronize_rcu() primitive is that it automatically self-limits: if grace periods @@ -268,7 +273,8 @@ over a rather long period of time, but improvements are always welcome! e. Periodically invoke synchronize_rcu(), permitting a limited number of updates per grace period. - The same cautions apply to call_rcu_bh() and call_rcu_sched(). + The same cautions apply to call_rcu_bh(), call_rcu_sched(), + call_srcu(), and kfree_rcu(). 9. All RCU list-traversal primitives, which include rcu_dereference(), list_for_each_entry_rcu(), and @@ -296,9 +302,9 @@ over a rather long period of time, but improvements are always welcome! all currently executing rcu_read_lock()-protected RCU read-side critical sections complete. It does -not- necessarily guarantee that all currently running interrupts, NMIs, preempt_disable() - code, or idle loops will complete. Therefore, if you do not have - rcu_read_lock()-protected read-side critical sections, do -not- - use synchronize_rcu(). + code, or idle loops will complete. Therefore, if your + read-side critical sections are protected by something other + than rcu_read_lock(), do -not- use synchronize_rcu(). Similarly, disabling preemption is not an acceptable substitute for rcu_read_lock(). Code that attempts to use preemption @@ -401,9 +407,9 @@ over a rather long period of time, but improvements are always welcome! read-side critical sections. It is the responsibility of the RCU update-side primitives to deal with this. -17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and - the __rcu sparse checks to validate your RCU code. These - can help find problems as follows: +17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the + __rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to + validate your RCU code. These can help find problems as follows: CONFIG_PROVE_RCU: check that accesses to RCU-protected data structures are carried out under the proper RCU diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt index a102d4b3724b..cd83d2348fef 100644 --- a/Documentation/RCU/lockdep.txt +++ b/Documentation/RCU/lockdep.txt @@ -64,6 +64,11 @@ checking of rcu_dereference() primitives: but retain the compiler constraints that prevent duplicating or coalescsing. This is useful when when testing the value of the pointer itself, for example, against NULL. + rcu_access_index(idx): + Return the value of the index and omit all barriers, but + retain the compiler constraints that prevent duplicating + or coalescsing. This is useful when when testing the + value of the index itself, for example, against -1. The rcu_dereference_check() check expression can be any boolean expression, but would normally include a lockdep expression. However, diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index 38428c125135..2e319d1b9ef2 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt @@ -79,7 +79,20 @@ complete. Pseudo-code using rcu_barrier() is as follows: 2. Execute rcu_barrier(). 3. Allow the module to be unloaded. -The rcutorture module makes use of rcu_barrier in its exit function +There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier() +functions for the other flavors of RCU, and you of course must match +the flavor of rcu_barrier() with that of call_rcu(). If your module +uses multiple flavors of call_rcu(), then it must also use multiple +flavors of rcu_barrier() when unloading that module. For example, if +it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on +srcu_struct_2(), then the following three lines of code will be required +when unloading: + + 1 rcu_barrier_bh(); + 2 srcu_barrier(&srcu_struct_1); + 3 srcu_barrier(&srcu_struct_2); + +The rcutorture module makes use of rcu_barrier() in its exit function as follows: 1 static void From 6231069bdab575fce862ca786f1c0ba5e4e9ba3b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Mar 2013 13:37:09 -0800 Subject: [PATCH 245/261] rcu: Add softirq-stall indications to stall-warning messages If RCU's softirq handler is prevented from executing, an RCU CPU stall warning can result. Ways to prevent RCU's softirq handler from executing include: (1) CPU spinning with interrupts disabled, (2) infinite loop in some softirq handler, and (3) in -rt kernels, an infinite loop in a set of real-time threads running at priorities higher than that of RCU's softirq handler. Because this situation can be difficult to track down, this commit causes the count of RCU softirq handler invocations to be printed with RCU CPU stall warnings. This information does require some interpretation, as now documented in Documentation/RCU/stallwarn.txt. Reported-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Tested-by: Paul Gortmaker --- Documentation/RCU/stallwarn.txt | 33 ++++++++++++++++++++++++--------- kernel/rcutree.h | 5 +++++ kernel/rcutree_plugin.h | 4 +++- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1927151b386b..e38b8df3d727 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -92,14 +92,14 @@ If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set, more information is printed with the stall-warning message, for example: INFO: rcu_preempt detected stall on CPU - 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 + 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543 (t=65000 jiffies) In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is printed: INFO: rcu_preempt detected stall on CPU - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D (t=65000 jiffies) The "(64628 ticks this GP)" indicates that this CPU has taken more @@ -116,13 +116,28 @@ number between the two "/"s is the value of the nesting, which will be a small positive number if in the idle loop and a very large positive number (as shown above) otherwise. -For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is -not in the process of trying to force itself into dyntick-idle state, the -"." indicates that the CPU has not given up forcing RCU into dyntick-idle -mode (it would be "H" otherwise), and the "timer not pending" indicates -that the CPU has not recently forced RCU into dyntick-idle mode (it -would otherwise indicate the number of microseconds remaining in this -forced state). +The "softirq=" portion of the message tracks the number of RCU softirq +handlers that the stalled CPU has executed. The number before the "/" +is the number that had executed since boot at the time that this CPU +last noted the beginning of a grace period, which might be the current +(stalled) grace period, or it might be some earlier grace period (for +example, if the CPU might have been in dyntick-idle mode for an extended +time period. The number after the "/" is the number that have executed +since boot until the current time. If this latter number stays constant +across repeated stall-warning messages, it is possible that RCU's softirq +handlers are no longer able to execute on this CPU. This can happen if +the stalled CPU is spinning with interrupts are disabled, or, in -rt +kernels, if a high-priority process is starving RCU's softirq handler. + +For CONFIG_RCU_FAST_NO_HZ kernels, the "last_accelerate:" prints the +low-order 16 bits (in hex) of the jiffies counter when this CPU last +invoked rcu_try_advance_all_cbs() from rcu_needs_cpu() or last invoked +rcu_accelerate_cbs() from rcu_prepare_for_idle(). The "nonlazy_posted:" +prints the number of non-lazy callbacks posted since the last call to +rcu_needs_cpu(). Finally, an "L" indicates that there are currently +no non-lazy callbacks ("." is printed otherwise, as shown above) and +"D" indicates that dyntick-idle processing is enabled ("." is printed +otherwise, for example, if disabled via the "nohz=" kernel boot parameter). Multiple Warnings From One Stall diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 8e756099a1a8..26c9bb3166a3 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -325,6 +325,11 @@ struct rcu_data { struct task_struct *nocb_kthread; #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ + /* 8) RCU CPU stall data. */ +#ifdef CONFIG_RCU_CPU_STALL_INFO + unsigned int softirq_snap; /* Snapshot of softirq activity. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ + int cpu; struct rcu_state *rsp; }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c1cc7e17ff9d..7fcd3bbf67da 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2070,10 +2070,11 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", + printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", cpu, ticks_value, ticks_title, atomic_read(&rdtp->dynticks) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, + rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), fast_no_hz); } @@ -2087,6 +2088,7 @@ static void print_cpu_stall_info_end(void) static void zero_cpu_stall_ticks(struct rcu_data *rdp) { rdp->ticks_this_gp = 0; + rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); } /* Increment ->ticks_this_gp for all flavors of RCU. */ From 81e59494a56cb14f559886c345c4a93fb576bbab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Mar 2013 15:44:52 -0700 Subject: [PATCH 246/261] rcu: Tone down debugging during boot-up and shutdown. In some situations, randomly delaying RCU grace-period initialization can cause more trouble than help. This commit therefore restricts this type of RCU self-torture to runtime, giving it a rest during boot and shutdown. Reported-by: Sasha Levin Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 71df6f9f5ce6..0e522504ae37 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1319,7 +1319,8 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); #ifdef CONFIG_PROVE_RCU_DELAY - if ((random32() % (rcu_num_nodes * 8)) == 0) + if ((random32() % (rcu_num_nodes * 8)) == 0 && + system_state == SYSTEM_RUNNING) schedule_timeout_uninterruptible(2); #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ cond_resched(); From 911af505ef407c2511106c224dd640f882f0f590 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Feb 2013 10:23:27 -0800 Subject: [PATCH 247/261] rcu: Provide compile-time control for no-CBs CPUs Currently, the only way to specify no-CBs CPUs is via the rcu_nocbs kernel command-line parameter. This is inconvenient in some cases, particularly for randconfig testing, so this commit adds a new set of kernel configuration parameters. CONFIG_RCU_NOCB_CPU_NONE (the default) retains the old behavior, CONFIG_RCU_NOCB_CPU_ZERO offloads callback processing from CPU 0 (along with any other CPUs specified by the rcu_nocbs boot-time parameter), and CONFIG_RCU_NOCB_CPU_ALL offloads callback processing from all CPUs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- init/Kconfig | 39 +++++++++++++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 14 ++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index c8bd349eb638..307499704580 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -676,6 +676,45 @@ config RCU_NOCB_CPU Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. +choice + prompt "Build-forced no-CBs CPUs" + default RCU_NOCB_CPU_NONE + help + This option allows no-CBs CPUs to be specified at build time. + Additional no-CBs CPUs may be specified by the rcu_nocbs= + boot parameter. + +config RCU_NOCB_CPU_NONE + bool "No build_forced no-CBs CPUs" + depends on RCU_NOCB_CPU + help + This option does not force any of the CPUs to be no-CBs CPUs. + Only CPUs designated by the rcu_nocbs= boot parameter will be + no-CBs CPUs. + +config RCU_NOCB_CPU_ZERO + bool "CPU 0 is a build_forced no-CBs CPU" + depends on RCU_NOCB_CPU + help + This option forces CPU 0 to be a no-CBs CPU. Additional CPUs + may be designated as no-CBs CPUs using the rcu_nocbs= boot + parameter will be no-CBs CPUs. + + Select this if CPU 0 needs to be a no-CBs CPU for real-time + or energy-efficiency reasons. + +config RCU_NOCB_CPU_ALL + bool "All CPUs are build_forced no-CBs CPUs" + depends on RCU_NOCB_CPU + help + This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs= + boot parameter will be ignored. + + Select this if all CPUs need to be no-CBs CPUs for real-time + or energy-efficiency reasons. + +endchoice + endmenu # "RCU Subsystem" config IKCONFIG diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 44f958a88b21..3e33aefce0ea 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -85,6 +85,20 @@ static void __init rcu_bootup_announce_oddness(void) if (nr_cpu_ids != NR_CPUS) printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_NOCB_CPU +#ifndef CONFIG_RCU_NOCB_CPU_NONE + if (!have_rcu_nocb_mask) { + alloc_bootmem_cpumask_var(&rcu_nocb_mask); + have_rcu_nocb_mask = true; + } +#ifdef CONFIG_RCU_NOCB_CPU_ZERO + pr_info("\tExperimental no-CBs CPU 0\n"); + cpumask_set_cpu(0, rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ +#ifdef CONFIG_RCU_NOCB_CPU_ALL + pr_info("\tExperimental no-CBs for all CPUs\n"); + cpumask_setall(rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ if (have_rcu_nocb_mask) { cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); From dae6e64d2bcfd4b06304ab864c7e3a4f6b5fedf4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Feb 2013 20:48:58 -0800 Subject: [PATCH 248/261] rcu: Introduce proper blocking to no-CBs kthreads GP waits Currently, the no-CBs kthreads do repeated timed waits for grace periods to elapse. This is crude and energy inefficient, so this commit allows no-CBs kthreads to specify exactly which grace period they are waiting for and also allows them to block for the entire duration until the desired grace period completes. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 6 +- kernel/rcutree.h | 12 +++- kernel/rcutree_plugin.h | 129 ++++++++++++++++++++++++++++++++++------ 3 files changed, 127 insertions(+), 20 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6ad0716e65dc..433f426c848f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -310,7 +310,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_gp_in_progress(rsp)) return 0; /* No, a grace period is already in progress. */ - if (rcu_nocb_needs_gp(rdp)) + if (rcu_nocb_needs_gp(rsp)) return 1; /* Yes, a no-CBs CPU needs one. */ if (!rdp->nxttail[RCU_NEXT_TAIL]) return 0; /* No, this is a no-CBs (or offline) CPU. */ @@ -1364,6 +1364,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) static void rcu_gp_cleanup(struct rcu_state *rsp) { unsigned long gp_duration; + int nocb = 0; struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); @@ -1394,11 +1395,13 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); rnp->completed = rsp->gpnum; + nocb += rcu_nocb_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); + rcu_nocb_gp_set(rnp, nocb); rsp->completed = rsp->gpnum; /* Declare grace period done. */ trace_rcu_grace_period(rsp->name, rsp->completed, "end"); @@ -3084,6 +3087,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rnp->level = i; INIT_LIST_HEAD(&rnp->blkd_tasks); + rcu_init_one_nocb(rnp); } } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7af39f4aaac4..e51373c0b748 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -196,6 +196,12 @@ struct rcu_node { /* Refused to boost: not sure why, though. */ /* This can happen due to race conditions. */ #endif /* #ifdef CONFIG_RCU_BOOST */ +#ifdef CONFIG_RCU_NOCB_CPU + wait_queue_head_t nocb_gp_wq[2]; + /* Place for rcu_nocb_kthread() to wait GP. */ + int n_nocb_gp_requests[2]; + /* Counts of upcoming no-CB GP requests. */ +#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp; @@ -326,7 +332,6 @@ struct rcu_data { int nocb_p_count_lazy; /* (approximate). */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_kthread; - bool nocb_needs_gp; #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ int cpu; @@ -524,7 +529,10 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); -static int rcu_nocb_needs_gp(struct rcu_data *rdp); +static int rcu_nocb_needs_gp(struct rcu_state *rsp); +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); +static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); +static void rcu_init_one_nocb(struct rcu_node *rnp); static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e33aefce0ea..90a191452550 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2176,11 +2176,51 @@ static int __init parse_rcu_nocb_poll(char *arg) early_param("rcu_nocb_poll", parse_rcu_nocb_poll); /* - * Does this CPU needs a grace period due to offloaded callbacks? + * Do any no-CBs CPUs need another grace period? + * + * Interrupts must be disabled. If the caller does not hold the root + * rnp_node structure's ->lock, the results are advisory only. */ -static int rcu_nocb_needs_gp(struct rcu_data *rdp) +static int rcu_nocb_needs_gp(struct rcu_state *rsp) { - return rdp->nocb_needs_gp; + struct rcu_node *rnp = rcu_get_root(rsp); + + return rnp->n_nocb_gp_requests[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; +} + +/* + * Clean up this rcu_node structure's no-CBs state at the end of + * a grace period, and also return whether any no-CBs CPU associated + * with this rcu_node structure needs another grace period. + */ +static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int c = rnp->completed; + int needmore; + + wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); + rnp->n_nocb_gp_requests[c & 0x1] = 0; + needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + return needmore; +} + +/* + * Set the root rcu_node structure's ->n_nocb_gp_requests field + * based on the sum of those of all rcu_node structures. This does + * double-count the root rcu_node structure's requests, but this + * is necessary to handle the possibility of a rcu_nocb_kthread() + * having awakened during the time that the rcu_node structures + * were being updated for the end of the previous grace period. + */ +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) +{ + rnp->n_nocb_gp_requests[(rnp->completed + 1) & 0x1] += nrq; +} + +static void rcu_init_one_nocb(struct rcu_node *rnp) +{ + init_waitqueue_head(&rnp->nocb_gp_wq[0]); + init_waitqueue_head(&rnp->nocb_gp_wq[1]); } /* Is the specified CPU a no-CPUs CPU? */ @@ -2289,31 +2329,73 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, static void rcu_nocb_wait_gp(struct rcu_data *rdp) { unsigned long c; + bool d; unsigned long flags; - unsigned long j; + unsigned long flags1; struct rcu_node *rnp = rdp->mynode; + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); raw_spin_lock_irqsave(&rnp->lock, flags); c = rnp->completed + 2; - rdp->nocb_needs_gp = true; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + + /* Count our request for a grace period. */ + rnp->n_nocb_gp_requests[c & 0x1]++; + + if (rnp->gpnum != rnp->completed) { + + /* + * This rcu_node structure believes that a grace period + * is in progress, so we are done. When this grace + * period ends, our request will be acted upon. + */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + + } else { + + /* + * Might not be a grace period, check root rcu_node + * structure to see if we must start one. + */ + if (rnp != rnp_root) + raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ + if (rnp_root->gpnum != rnp_root->completed) { + raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ + } else { + + /* + * No grace period, so we need to start one. + * The good news is that we can wait for exactly + * one grace period instead of part of the current + * grace period and all of the next grace period. + * Adjust counters accordingly and start the + * needed grace period. + */ + rnp->n_nocb_gp_requests[c & 0x1]--; + c = rnp_root->completed + 1; + rnp->n_nocb_gp_requests[c & 0x1]++; + rnp_root->n_nocb_gp_requests[c & 0x1]++; + local_save_flags(flags1); + rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ + } + + /* Clean up locking and irq state. */ + if (rnp != rnp_root) + raw_spin_unlock_irqrestore(&rnp->lock, flags); + else + local_irq_restore(flags); + } /* * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ for (;;) { - j = jiffies; - schedule_timeout_interruptible(2); - raw_spin_lock_irqsave(&rnp->lock, flags); - if (ULONG_CMP_GE(rnp->completed, c)) { - rdp->nocb_needs_gp = false; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + wait_event_interruptible( + rnp->nocb_gp_wq[c & 0x1], + (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); + if (likely(d)) break; - } - if (j == jiffies) - flush_signals(current); - raw_spin_unlock_irqrestore(&rnp->lock, flags); + flush_signals(current); } smp_mb(); /* Ensure that CB invocation happens after GP end. */ } @@ -2416,11 +2498,24 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static int rcu_nocb_needs_gp(struct rcu_data *rdp) +static int rcu_nocb_needs_gp(struct rcu_state *rsp) { return 0; } +static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +{ + return 0; +} + +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) +{ +} + +static void rcu_init_one_nocb(struct rcu_node *rnp) +{ +} + static bool is_nocb_cpu(int cpu) { return false; From 21e7a6087480451804124cee27c0a7d0a7de1564 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Feb 2013 17:42:16 -0800 Subject: [PATCH 249/261] rcu: Add event tracing for no-CBs CPUs' callback registration Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 90a191452550..7225a5a14cef 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2285,6 +2285,13 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, if (!is_nocb_cpu(rdp->cpu)) return 0; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); + if (__is_kfree_rcu_offset((unsigned long)rhp->func)) + trace_rcu_kfree_callback(rdp->rsp->name, rhp, + (unsigned long)rhp->func, + rdp->qlen_lazy, rdp->qlen); + else + trace_rcu_callback(rdp->rsp->name, rhp, + rdp->qlen_lazy, rdp->qlen); return 1; } From 09c7b890622d72b5e004cc249bbe610e8b928ddf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Feb 2013 15:55:02 -0800 Subject: [PATCH 250/261] rcu: Add event tracing for no-CBs CPUs' grace periods Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 55 ++++++++++++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 30 +++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 1918e832da4f..cdfed6d386eb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -71,6 +71,58 @@ TRACE_EVENT(rcu_grace_period, __entry->rcuname, __entry->gpnum, __entry->gpevent) ); +/* + * Tracepoint for no-callbacks grace-period events. The caller should + * pull the data from the rcu_node structure, other than rcuname, which + * comes from the rcu_state structure, and event, which is one of the + * following: + * + * "Startleaf": Request a nocb grace period based on leaf-node data. + * "Startedleaf": Leaf-node start proved sufficient. + * "Startedleafroot": Leaf-node start proved sufficient after checking root. + * "Startedroot": Requested a nocb grace period based on root-node data. + * "StartWait": Start waiting for the requested grace period. + * "ResumeWait": Resume waiting after signal. + * "EndWait": Complete wait. + * "Cleanup": Clean up rcu_node structure after previous GP. + * "CleanupMore": Clean up, and another no-CB GP is needed. + */ +TRACE_EVENT(rcu_nocb_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, + unsigned long c, u8 level, int grplo, int grphi, + char *gpevent), + + TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, completed) + __field(unsigned long, c) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->completed = completed; + __entry->c = c; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %lu %lu %u %d %d %s", + __entry->rcuname, __entry->gpnum, __entry->completed, + __entry->c, __entry->level, __entry->grplo, __entry->grphi, + __entry->gpevent) +); + /* * Tracepoint for grace-period-initialization events. These are * distinguished by the type of RCU, the new grace-period number, the @@ -601,6 +653,9 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ qsmask) do { } while (0) +#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \ + level, grplo, grphi, event) \ + do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7225a5a14cef..e32236e83dda 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2201,6 +2201,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); rnp->n_nocb_gp_requests[c & 0x1] = 0; needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + needmore ? "CleanupMore" : "Cleanup"); return needmore; } @@ -2347,6 +2350,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) /* Count our request for a grace period. */ rnp->n_nocb_gp_requests[c & 0x1]++; + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "Startleaf"); if (rnp->gpnum != rnp->completed) { @@ -2355,6 +2361,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * is in progress, so we are done. When this grace * period ends, our request will be acted upon. */ + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, rnp->grphi, + "Startedleaf"); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { @@ -2366,6 +2376,11 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (rnp != rnp_root) raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ if (rnp_root->gpnum != rnp_root->completed) { + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleafroot"); raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ } else { @@ -2381,6 +2396,11 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp_root->completed + 1; rnp->n_nocb_gp_requests[c & 0x1]++; rnp_root->n_nocb_gp_requests[c & 0x1]++; + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedroot"); local_save_flags(flags1); rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ } @@ -2396,6 +2416,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2403,7 +2426,14 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, rnp->grphi, + "ResumeWait"); } + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } From a488985851cf2facd2227bd982cc2c251df56268 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Dec 2012 08:16:28 -0800 Subject: [PATCH 251/261] rcu: Distinguish "rcuo" kthreads by RCU flavor Currently, the per-no-CBs-CPU kthreads are named "rcuo" followed by the CPU number, for example, "rcuo". This is problematic given that there are either two or three RCU flavors, each of which gets a per-CPU kthread with exactly the same name. This commit therefore introduces a one-letter abbreviation for each RCU flavor, namely 'b' for RCU-bh, 'p' for RCU-preempt, and 's' for RCU-sched. This abbreviation is used to distinguish the "rcuo" kthreads, for example, for CPU 0 we would have "rcuob/0", "rcuop/0", and "rcuos/0". Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Dietmar Eggemann --- Documentation/kernel-parameters.txt | 7 +++++-- init/Kconfig | 13 +++++++------ kernel/rcutree.c | 7 ++++--- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 5 +++-- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..a17ba16c8fc8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2461,9 +2461,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. In kernels built with CONFIG_RCU_NOCB_CPU=y, set the specified list of CPUs to be no-callback CPUs. Invocation of these CPUs' RCU callbacks will - be offloaded to "rcuoN" kthreads created for - that purpose. This reduces OS jitter on the + be offloaded to "rcuox/N" kthreads created for + that purpose, where "x" is "b" for RCU-bh, "p" + for RCU-preempt, and "s" for RCU-sched, and "N" + is the CPU number. This reduces OS jitter on the offloaded CPUs, which can be useful for HPC and + real-time workloads. It can also improve energy efficiency for asymmetric multiprocessors. diff --git a/init/Kconfig b/init/Kconfig index 307499704580..717584064a7e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -666,12 +666,13 @@ config RCU_NOCB_CPU This option offloads callback invocation from the set of CPUs specified at boot time by the rcu_nocbs parameter. - For each such CPU, a kthread ("rcuoN") will be created to - invoke callbacks, where the "N" is the CPU being offloaded. - Nothing prevents this kthread from running on the specified - CPUs, but (1) the kthreads may be preempted between each - callback, and (2) affinity or cgroups can be used to force - the kthreads to run on whatever set of CPUs is desired. + For each such CPU, a kthread ("rcuox/N") will be created to + invoke callbacks, where the "N" is the CPU being offloaded, + and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and + "s" for RCU-sched. Nothing prevents this kthread from running + on the specified CPUs, but (1) the kthreads may be preempted + between each callback, and (2) affinity or cgroups can be used + to force the kthreads to run on whatever set of CPUs is desired. Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 433f426c848f..074cb2d974bf 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -64,7 +64,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname, cr) { \ +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ .level = { &sname##_state.node[0] }, \ .call = cr, \ .fqs_state = RCU_GP_IDLE, \ @@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ .name = #sname, \ + .abbr = sabbr, \ } struct rcu_state rcu_sched_state = - RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); + RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index e51373c0b748..b6c2335efbdf 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -443,6 +443,7 @@ struct rcu_state { unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ char *name; /* Name of structure. */ + char abbr; /* Abbreviated name. */ struct list_head flavors; /* List of RCU flavors. */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e32236e83dda..c0164441ab92 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -111,7 +111,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = - RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); + RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; @@ -2517,7 +2517,8 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) return; for_each_cpu(cpu, rcu_nocb_mask) { rdp = per_cpu_ptr(rsp->rda, cpu); - t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu); + t = kthread_run(rcu_nocb_kthread, rdp, + "rcuo%c/%d", rsp->abbr, cpu); BUG_ON(IS_ERR(t)); ACCESS_ONCE(rdp->nocb_kthread) = t; } From 5e44ce35a6ec1a16522fa2099dda27aefd8a584e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Dec 2012 12:35:29 -0800 Subject: [PATCH 252/261] rcu: Export RCU_FAST_NO_HZ parameters to sysfs RCU_FAST_NO_HZ operation is controlled by four compile-time C-preprocessor macros, but some use cases benefit greatly from runtime adjustment, particularly when tuning devices. This commit therefore creates the corresponding sysfs entries. Reported-by: Robin Randhawa Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c0164441ab92..28185ad18df3 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1617,6 +1617,15 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +static int rcu_idle_flushes = RCU_IDLE_FLUSHES; +module_param(rcu_idle_flushes, int, 0644); +static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; +module_param(rcu_idle_opt_flushes, int, 0644); +static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; +module_param(rcu_idle_gp_delay, int, 0644); +static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; +module_param(rcu_idle_lazy_gp_delay, int, 0644); + extern int tick_nohz_enabled; /* @@ -1696,10 +1705,10 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) } /* Set up for the possibility that RCU will post a timer. */ if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, - RCU_IDLE_GP_DELAY) - jiffies; + *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; + *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; } return 0; @@ -1805,11 +1814,11 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("User dyntick with callbacks"); rdtp->idle_gp_timer_expires = - round_up(jiffies + RCU_IDLE_GP_DELAY, - RCU_IDLE_GP_DELAY); + round_up(jiffies + rcu_idle_gp_delay, + rcu_idle_gp_delay); } else if (rcu_cpu_has_callbacks(cpu)) { rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); + round_jiffies(jiffies + rcu_idle_lazy_gp_delay); trace_rcu_prep_idle("User dyntick with lazy callbacks"); } else { return; @@ -1861,8 +1870,8 @@ static void rcu_prepare_for_idle(int cpu) /* Check and update the ->dyntick_drain sequencing. */ if (rdtp->dyntick_drain <= 0) { /* First time through, initialize the counter. */ - rdtp->dyntick_drain = RCU_IDLE_FLUSHES; - } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && + rdtp->dyntick_drain = rcu_idle_flushes; + } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && !rcu_pending(cpu) && !local_softirq_pending()) { /* Can we go dyntick-idle despite still having callbacks? */ @@ -1871,11 +1880,11 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("Dyntick with callbacks"); rdtp->idle_gp_timer_expires = - round_up(jiffies + RCU_IDLE_GP_DELAY, - RCU_IDLE_GP_DELAY); + round_up(jiffies + rcu_idle_gp_delay, + rcu_idle_gp_delay); } else { rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); + round_jiffies(jiffies + rcu_idle_lazy_gp_delay); trace_rcu_prep_idle("Dyntick with lazy callbacks"); } tp = &rdtp->idle_gp_timer; From b11cc5760a9c48c870ad286e8a6d8fdb998fa58d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Dec 2012 14:21:14 -0800 Subject: [PATCH 253/261] rcu: Accelerate RCU callbacks at grace-period end Now that callback acceleration is idempotent, it is safe to accelerate callbacks during grace-period cleanup on any CPUs that the kthread happens to be running on. This commit therefore propagates the completion of the grace period to the per-CPU data structures, and also adds an rcu_advance_cbs() just before the cpu_needs_another_gp() check in order to reduce false-positive grace periods. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 074cb2d974bf..2015bce749f9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1396,6 +1396,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); rnp->completed = rsp->gpnum; + rdp = this_cpu_ptr(rsp->rda); + if (rnp == rdp->mynode) + __rcu_process_gp_end(rsp, rnp, rdp); nocb += rcu_nocb_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); @@ -1408,6 +1411,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); + rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ if (cpu_needs_another_gp(rsp, rdp)) rsp->gp_flags = 1; raw_spin_unlock_irq(&rnp->lock); @@ -1497,6 +1501,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); + /* + * If there is no grace period in progress right now, any + * callbacks we have up to this point will be satisfied by the + * next grace period. Also, advancing the callbacks reduces the + * probability of false positives from cpu_needs_another_gp() + * resulting in pointless grace periods. So, advance callbacks! + */ + rcu_advance_cbs(rsp, rnp, rdp); + if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* @@ -1509,14 +1522,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) return; } - /* - * Because there is no grace period in progress right now, - * any callbacks we have up to this point will be satisfied - * by the next grace period. So this is a good place to - * assign a grace period number to recently posted callbacks. - */ - rcu_accelerate_cbs(rsp, rnp, rdp); - rsp->gp_flags = RCU_GP_FLAG_INIT; raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: [PATCH 254/261] rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 28 +- include/linux/rcupdate.h | 1 + init/Kconfig | 15 +- kernel/rcutree.c | 28 +- kernel/rcutree.h | 12 +- kernel/rcutree_plugin.h | 392 ++++++++-------------------- kernel/rcutree_trace.c | 2 - 7 files changed, 157 insertions(+), 321 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a17ba16c8fc8..22303b2e74bc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. + + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. @@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] - Set delay from grace-period initialization to - first attempt to force quiescent states. - Units are jiffies, minimum value is zero, - and maximum value is HZ. + rcutree.rcu_idle_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.jiffies_till_next_fqs= [KNL,BOOT] - Set delay between subsequent attempts to force - quiescent states. Units are jiffies, minimum - value is one, and maximum value is HZ. + rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). + Lazy RCU callbacks are those which RCU can + prove do nothing more than free memory. rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..9ed2c9a4de45 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define ulong2long(a) (*(long *)(&(a))) /* Exported common interfaces */ diff --git a/init/Kconfig b/init/Kconfig index 717584064a7e..a3a2304fa6d2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ depends on NO_HZ && SMP default n help - This option causes RCU to attempt to accelerate grace periods in - order to allow CPUs to enter dynticks-idle state more quickly. - On the other hand, this option increases the overhead of the - dynticks-idle checking, thus degrading scheduling latency. + This option permits CPUs to enter dynticks-idle state even if + they have RCU callbacks queued, and prevents RCU from waking + these CPUs up more than roughly once every four jiffies (by + default, you can adjust this using the rcutree.rcu_idle_gp_delay + parameter), thus improving energy efficiency. On the other + hand, this option increases the duration of RCU grace periods, + for example, slowing down synchronize_rcu(). - Say Y if energy efficiency is critically important, and you don't - care about real-time response. + Say Y if energy efficiency is critically important, and you + don't care about increased grace-period durations. Say N if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2015bce749f9..7b1d7769872a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu) } /* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. + * Return true if the specified CPU has any callback. If all_lazy is + * non-NULL, store an indication of whether all callbacks are lazy. + * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int rcu_cpu_has_callbacks(int cpu) +static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) { + bool al = true; + bool hc = false; + struct rcu_data *rdp; struct rcu_state *rsp; - /* RCU callbacks either ready or pending? */ - for_each_rcu_flavor(rsp) - if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) - return 1; - return 0; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->qlen != rdp->qlen_lazy) + al = false; + if (rdp->nxtlist) + hc = true; + } + if (all_lazy) + *all_lazy = al; + return hc; } /* @@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); - rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* Add CPU to rcu_node bitmasks. */ @@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); - rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b6c2335efbdf..96a27f922e92 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,18 +88,13 @@ struct rcu_dynticks { int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ #ifdef CONFIG_RCU_FAST_NO_HZ - int dyntick_drain; /* Prepare-for-idle state variable. */ - unsigned long dyntick_holdoff; - /* No retries for the jiffy of failure. */ - struct timer_list idle_gp_timer; - /* Wake up CPU sleeping with callbacks. */ - unsigned long idle_gp_timer_expires; - /* When to wake up CPU (for repost). */ - bool idle_first_pass; /* First pass of attempt to go idle? */ + bool all_lazy; /* Are all CPU's CBs lazy? */ unsigned long nonlazy_posted; /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + unsigned long last_accelerate; + /* Last jiffy CBs were accelerated. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; @@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ static void __cpuinit rcu_prepare_kthreads(int cpu); -static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); static void rcu_idle_count_callbacks_posted(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28185ad18df3..d318f9f18be5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu); -} - -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ + return rcu_cpu_has_callbacks(cpu, NULL); } /* @@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void) * * The following three proprocessor symbols control this state machine: * - * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt - * to satisfy RCU. Beyond this point, it is better to incur a periodic - * scheduling-clock interrupt than to loop through the state machine - * at full power. - * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are - * optional if RCU does not need anything immediately from this - * CPU, even if this CPU still has RCU callbacks queued. The first - * times through the state machine are mandatory: we need to give - * the state machine a chance to communicate a quiescent state - * to the RCU core. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This * is sized to be roughly one RCU grace period. Those energy-efficiency @@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void) * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ -#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ -#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -static int rcu_idle_flushes = RCU_IDLE_FLUSHES; -module_param(rcu_idle_flushes, int, 0644); -static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; -module_param(rcu_idle_opt_flushes, int, 0644); static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; @@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Does the specified flavor of RCU have non-lazy callbacks pending on - * the specified CPU? Both RCU flavor and CPU are specified by the - * rcu_data structure. + * Try to advance callbacks for all flavors of RCU on the current CPU. + * Afterwards, if there are any callbacks ready for immediate invocation, + * return true. */ -static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) +static bool rcu_try_advance_all_cbs(void) { - return rdp->qlen != rdp->qlen_lazy; -} + bool cbs_ready = false; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; -#ifdef CONFIG_TREE_PREEMPT_RCU + for_each_rcu_flavor(rsp) { + rdp = this_cpu_ptr(rsp->rda); + rnp = rdp->mynode; -/* - * Are there non-lazy RCU-preempt callbacks? (There cannot be if there - * is no RCU-preempt in the kernel.) - */ -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); + /* + * Don't bother checking unless a grace period has + * completed since we last checked and there are + * callbacks not yet ready to invoke. + */ + if (rdp->completed != rnp->completed && + rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) + rcu_process_gp_end(rsp, rdp); - return __rcu_cpu_has_nonlazy_callbacks(rdp); -} - -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) -{ - return 0; -} - -#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ - -/* - * Does any flavor of RCU have non-lazy callbacks on the specified CPU? - */ -static bool rcu_cpu_has_nonlazy_callbacks(int cpu) -{ - return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || - __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_cpu_has_nonlazy_callbacks(cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + cbs_ready = true; + } + return cbs_ready; } /* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready + * to invoke. If the CPU has callbacks, try to advance them. Tell the + * caller to set the timeout based on whether or not there are non-lazy + * callbacks. * - * The delta_jiffies argument is used to store the time when RCU is - * going to need the CPU again if it still has callbacks. The reason - * for this is that rcu_prepare_for_idle() might need to post a timer, - * but if so, it will do so after tick_nohz_stop_sched_tick() has set - * the wakeup time for this CPU. This means that RCU's timer can be - * delayed until the wakeup time, which defeats the purpose of posting - * a timer. + * The caller must have disabled interrupts. */ -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +int rcu_needs_cpu(int cpu, unsigned long *dj) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; + /* Snapshot to detect later posting of non-lazy callback. */ + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; + /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - *delta_jiffies = ULONG_MAX; + if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { + *dj = ULONG_MAX; return 0; } - if (rdtp->dyntick_holdoff == jiffies) { - /* RCU recently tried and failed, so don't try again. */ - *delta_jiffies = 1; + + /* Attempt to advance callbacks. */ + if (rcu_try_advance_all_cbs()) { + /* Some ready to invoke, so initiate later invocation. */ + invoke_rcu_core(); return 1; } - /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; + rdtp->last_accelerate = jiffies; + + /* Request timer delay depending on laziness, and round. */ + if (rdtp->all_lazy) { + *dj = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; - *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } return 0; } /* - * Handler for smp_call_function_single(). The only point of this - * handler is to wake the CPU up, so the handler does only tracing. - */ -void rcu_idle_demigrate(void *unused) -{ - trace_rcu_prep_idle("Demigrate"); -} - -/* - * Timer handler used to force CPU to start pushing its remaining RCU - * callbacks in the case where it entered dyntick-idle mode with callbacks - * pending. The hander doesn't really need to do anything because the - * real work is done upon re-entry to idle, or by the next scheduling-clock - * interrupt should idle not be re-entered. - * - * One special case: the timer gets migrated without awakening the CPU - * on which the timer was scheduled on. In this case, we must wake up - * that CPU. We do so with smp_call_function_single(). - */ -static void rcu_idle_gp_timer_func(unsigned long cpu_in) -{ - int cpu = (int)cpu_in; - - trace_rcu_prep_idle("Timer"); - if (cpu != smp_processor_id()) - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); - else - WARN_ON_ONCE(1); /* Getting here can hang the system... */ -} - -/* - * Initialize the timer used to pull CPUs out of dyntick-idle mode. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - rdtp->dyntick_holdoff = jiffies - 1; - setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); - rdtp->idle_gp_timer_expires = jiffies - 1; - rdtp->idle_first_pass = 1; -} - -/* - * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to ->idle_gp_timer, so cancel it. This will - * do nothing if this timer is not active, so just cancel it unconditionally. - */ -static void rcu_cleanup_after_idle(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - del_timer(&rdtp->idle_gp_timer); - trace_rcu_prep_idle("Cleanup after idle"); - rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); -} - -/* - * Check to see if any RCU-related work can be done by the current CPU, - * and if so, schedule a softirq to get it done. This function is part - * of the RCU implementation; it is -not- an exported member of the RCU API. - * - * The idea is for the current CPU to clear out all work required by the - * RCU core for the current grace period, so that this CPU can be permitted - * to enter dyntick-idle mode. In some cases, it will need to be awakened - * at the end of the grace period by whatever CPU ends the grace period. - * This allows CPUs to go dyntick-idle more quickly, and to reduce the - * number of wakeups by a modest integer factor. - * - * Because it is not legal to invoke rcu_process_callbacks() with irqs - * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The ->dyntick_drain field controls the sequencing. + * Prepare a CPU for idle from an RCU perspective. The first major task + * is to sense whether nohz mode has been enabled or disabled via sysfs. + * The second major task is to check to see if a non-lazy callback has + * arrived at a CPU that previously had only lazy callbacks. The third + * major task is to accelerate (that is, assign grace-period numbers to) + * any recently arrived callbacks. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { - struct timer_list *tp; + struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_node *rnp; + struct rcu_state *rsp; int tne; /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_enabled); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu)) + if (rcu_cpu_has_callbacks(cpu, NULL)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; - /* Adaptive-tick mode, where usermode execution is idle to RCU. */ - if (!is_idle_task(current)) { - rdtp->dyntick_holdoff = jiffies - 1; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("User dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else if (rcu_cpu_has_callbacks(cpu)) { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("User dyntick with lazy callbacks"); - } else { - return; - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + /* If this is a no-CBs CPU, no callbacks, just return. */ + if (is_nocb_cpu(cpu)) return; - } /* - * If this is an idle re-entry, for example, due to use of - * RCU_NONIDLE() or the new idle-loop tracing API within the idle - * loop, then don't take any state-machine actions, unless the - * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the ->idle_gp_timer if this CPU has callbacks - * pending. + * If a non-lazy callback arrived at a CPU having only lazy + * callbacks, invoke RCU core for the side-effect of recalculating + * idle duration on re-entry to idle. */ - if (!rdtp->idle_first_pass && - (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { - if (rcu_cpu_has_callbacks(cpu)) { - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - } - return; - } - rdtp->idle_first_pass = 0; - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; - - /* - * If there are no callbacks on this CPU, enter dyntick-idle mode. - * Also reset state to avoid prejudicing later attempts. - */ - if (!rcu_cpu_has_callbacks(cpu)) { - rdtp->dyntick_holdoff = jiffies - 1; - rdtp->dyntick_drain = 0; - trace_rcu_prep_idle("No callbacks"); - return; - } - - /* - * If in holdoff mode, just return. We will presumably have - * refrained from disabling the scheduling-clock tick. - */ - if (rdtp->dyntick_holdoff == jiffies) { - trace_rcu_prep_idle("In holdoff"); - return; - } - - /* Check and update the ->dyntick_drain sequencing. */ - if (rdtp->dyntick_drain <= 0) { - /* First time through, initialize the counter. */ - rdtp->dyntick_drain = rcu_idle_flushes; - } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && - !rcu_pending(cpu) && - !local_softirq_pending()) { - /* Can we go dyntick-idle despite still having callbacks? */ - rdtp->dyntick_drain = 0; - rdtp->dyntick_holdoff = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("Dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("Dyntick with lazy callbacks"); - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - return; /* Nothing more to do immediately. */ - } else if (--(rdtp->dyntick_drain) <= 0) { - /* We have hit the limit, so time to give up. */ - rdtp->dyntick_holdoff = jiffies; - trace_rcu_prep_idle("Begin holdoff"); - invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ - return; - } - - /* - * Do one step of pushing the remaining RCU callbacks through - * the RCU core state machine. - */ -#ifdef CONFIG_TREE_PREEMPT_RCU - if (per_cpu(rcu_preempt_data, cpu).nxtlist) { - rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state); - } -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state); - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state); - } - - /* - * If RCU callbacks are still pending, RCU still needs this CPU. - * So try forcing the callbacks through the grace period. - */ - if (rcu_cpu_has_callbacks(cpu)) { - trace_rcu_prep_idle("More callbacks"); + if (rdtp->all_lazy && + rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { invoke_rcu_core(); - } else { - trace_rcu_prep_idle("Callbacks drained"); + return; + } + + /* + * If we have not yet accelerated this jiffy, accelerate all + * callbacks on this CPU. + */ + if (rdtp->last_accelerate == jiffies) + return; + rdtp->last_accelerate = jiffies; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (!*rdp->nxttail[RCU_DONE_TAIL]) + continue; + rnp = rdp->mynode; + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rcu_accelerate_cbs(rsp, rnp, rdp); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } +} + +/* + * Clean up for exit from idle. Attempt to advance callbacks based on + * any grace periods that elapsed while the CPU was idle, and if any + * callbacks are now ready to invoke, initiate invocation. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + struct rcu_data *rdp; + struct rcu_state *rsp; + + if (is_nocb_cpu(cpu)) + return; + rcu_try_advance_all_cbs(); + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + invoke_rcu_core(); } } @@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier); static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - struct timer_list *tltp = &rdtp->idle_gp_timer; - char c; + unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; - c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; - if (timer_pending(tltp)) - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, c, tltp->expires - jiffies); - else - sprintf(cp, "drain=%d %c timer not pending", - rdtp->dyntick_drain, c); + sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", + rdtp->last_accelerate & 0xffff, jiffies & 0xffff, + ulong2long(nlpd), + rdtp->all_lazy ? 'L' : '.', + rdtp->tick_nohz_enabled_snap ? '.' : 'D'); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0d095dcaa670..49099e81c87b 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,8 +46,6 @@ #define RCU_TREE_NONCORE #include "rcutree.h" -#define ulong2long(a) (*(long *)(&(a))) - static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { From b92db6cb7efcbd41e469e1d757c47da4865f7622 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 14:52:41 -0800 Subject: [PATCH 255/261] rcu: Rearrange locking in rcu_start_gp() If CPUs are to give prior notice of needed grace periods, it will be necessary to invoke rcu_start_gp() without dropping the root rcu_node structure's ->lock. This commit takes a first step in this direction by moving the release of this lock to the end of rcu_start_gp(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7b1d7769872a..2c6a9314c7f7 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1521,16 +1521,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - rsp->gp_flags = RCU_GP_FLAG_INIT; - raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ /* Ensure that CPU is aware of completion of last grace period. */ - rcu_process_gp_end(rsp, rdp); - local_irq_restore(flags); + __rcu_process_gp_end(rsp, rdp->mynode, rdp); /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* From bd9f0686fc8c9a01c6850b1c611d1c9ad80b86d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 21:51:20 -0800 Subject: [PATCH 256/261] rcu: Repurpose no-CBs event tracing to future-GP events Dyntick-idle CPUs need to be able to pre-announce their need for grace periods. This can be done using something similar to the mechanism used by no-CB CPUs to announce their need for grace periods. This commit moves in this direction by renaming the no-CBs grace-period event tracing to suit the new future-grace-period needs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 16 +++++----- kernel/rcutree_plugin.h | 62 ++++++++++++++++++++------------------ 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index cdfed6d386eb..59ebcc89f148 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -72,10 +72,10 @@ TRACE_EVENT(rcu_grace_period, ); /* - * Tracepoint for no-callbacks grace-period events. The caller should - * pull the data from the rcu_node structure, other than rcuname, which - * comes from the rcu_state structure, and event, which is one of the - * following: + * Tracepoint for future grace-period events, including those for no-callbacks + * CPUs. The caller should pull the data from the rcu_node structure, + * other than rcuname, which comes from the rcu_state structure, and event, + * which is one of the following: * * "Startleaf": Request a nocb grace period based on leaf-node data. * "Startedleaf": Leaf-node start proved sufficient. @@ -87,7 +87,7 @@ TRACE_EVENT(rcu_grace_period, * "Cleanup": Clean up rcu_node structure after previous GP. * "CleanupMore": Clean up, and another no-CB GP is needed. */ -TRACE_EVENT(rcu_nocb_grace_period, +TRACE_EVENT(rcu_future_grace_period, TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, unsigned long c, u8 level, int grplo, int grphi, @@ -653,9 +653,9 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ qsmask) do { } while (0) -#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \ - level, grplo, grphi, event) \ - do { } while (0) +#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ + level, grplo, grphi, event) \ + do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index d318f9f18be5..df50502eca2c 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2034,9 +2034,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); rnp->n_nocb_gp_requests[c & 0x1] = 0; needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; - trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - needmore ? "CleanupMore" : "Cleanup"); + trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + needmore ? "CleanupMore" : "Cleanup"); return needmore; } @@ -2183,9 +2183,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) /* Count our request for a grace period. */ rnp->n_nocb_gp_requests[c & 0x1]++; - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "Startleaf"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "Startleaf"); if (rnp->gpnum != rnp->completed) { @@ -2194,10 +2194,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * is in progress, so we are done. When this grace * period ends, our request will be acted upon. */ - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, rnp->grphi, - "Startedleaf"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleaf"); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { @@ -2209,11 +2209,12 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (rnp != rnp_root) raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ if (rnp_root->gpnum != rnp_root->completed) { - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleafroot"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, + rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleafroot"); raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ } else { @@ -2229,11 +2230,12 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp_root->completed + 1; rnp->n_nocb_gp_requests[c & 0x1]++; rnp_root->n_nocb_gp_requests[c & 0x1]++; - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedroot"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, + rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedroot"); local_save_flags(flags1); rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ } @@ -2249,9 +2251,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "StartWait"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2259,14 +2261,14 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, rnp->grphi, - "ResumeWait"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, + rnp->grphi, "ResumeWait"); } - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "EndWait"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } From b8462084a2a88a6a0489f9bb7d8b1bb95bc455ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 22:04:18 -0800 Subject: [PATCH 257/261] rcu: Push lock release to rcu_start_gp()'s callers If CPUs are to give prior notice of needed grace periods, it will be necessary to invoke rcu_start_gp() without dropping the root rcu_node structure's ->lock. This commit takes a second step in this direction by moving the release of this lock to rcu_start_gp()'s callers. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 24 ++++++++++-------------- kernel/rcutree_plugin.h | 5 ++--- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2c6a9314c7f7..0d532950baa3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1487,16 +1487,14 @@ static int __noreturn rcu_gp_kthread(void *arg) /* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold - * the root node's ->lock, which is released before return. Hard irqs must - * be disabled. + * the root node's ->lock and hard irqs must be disabled. * * Note that it is legal for a dying CPU (which is marked as offline) to * invoke this function. This can happen when the dying CPU reports its * quiescent state. */ static void -rcu_start_gp(struct rcu_state *rsp, unsigned long flags) - __releases(rcu_get_root(rsp)->lock) +rcu_start_gp(struct rcu_state *rsp) { struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); @@ -1510,15 +1508,13 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) */ rcu_advance_cbs(rsp, rnp, rdp); - if (!rsp->gp_kthread || - !cpu_needs_another_gp(rsp, rdp)) { + if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* * Either we have not yet spawned the grace-period * task, this CPU does not need another grace period, * or a grace period is already in progress. * Either way, don't start a new grace period. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } rsp->gp_flags = RCU_GP_FLAG_INIT; @@ -1528,15 +1524,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); - raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* * Report a full set of quiescent states to the specified rcu_state * data structure. This involves cleaning up after the prior grace * period and letting rcu_start_gp() start up the next grace period - * if one is needed. Note that the caller must hold rnp->lock, as - * required by rcu_start_gp(), which will release it. + * if one is needed. Note that the caller must hold rnp->lock, which + * is released before return. */ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) @@ -2134,7 +2129,8 @@ __rcu_process_callbacks(struct rcu_state *rsp) local_irq_save(flags); if (cpu_needs_another_gp(rsp, rdp)) { raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ - rcu_start_gp(rsp, flags); /* releases above lock */ + rcu_start_gp(rsp); + raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); } else { local_irq_restore(flags); } @@ -2214,11 +2210,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, /* Start a new grace period if one not already started. */ if (!rcu_gp_in_progress(rsp)) { - unsigned long nestflag; struct rcu_node *rnp_root = rcu_get_root(rsp); - raw_spin_lock_irqsave(&rnp_root->lock, nestflag); - rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ + raw_spin_lock(&rnp_root->lock); + rcu_start_gp(rsp); + raw_spin_unlock(&rnp_root->lock); } else { /* Give the grace period a kick. */ rdp->blimit = LONG_MAX; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index df50502eca2c..073ded26e259 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2174,7 +2174,6 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) unsigned long c; bool d; unsigned long flags; - unsigned long flags1; struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); @@ -2236,8 +2235,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c, rnp->level, rnp->grplo, rnp->grphi, "Startedroot"); - local_save_flags(flags1); - rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ + rcu_start_gp(rdp->rsp); + raw_spin_unlock(&rnp->lock); } /* Clean up locking and irq state. */ From 8b425aa8f1acfe48aed919c7aadff2ed290fe969 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 30 Dec 2012 13:06:35 -0800 Subject: [PATCH 258/261] rcu: Rename n_nocb_gp_requests to need_future_gp CPUs going idle need to be able to indicate their need for future grace periods. A mechanism for doing this already exists for no-callbacks CPUs, so the idea is to re-use that mechanism. This commit therefore moves the ->n_nocb_gp_requests field of the rcu_node structure out from under the CONFIG_RCU_NOCB_CPU #ifdef and renames it to ->need_future_gp. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 4 ++-- kernel/rcutree_plugin.h | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 96a27f922e92..034b524594bd 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -194,9 +194,9 @@ struct rcu_node { #ifdef CONFIG_RCU_NOCB_CPU wait_queue_head_t nocb_gp_wq[2]; /* Place for rcu_nocb_kthread() to wait GP. */ - int n_nocb_gp_requests[2]; - /* Counts of upcoming no-CB GP requests. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ + int need_future_gp[2]; + /* Counts of upcoming no-CB GP requests. */ raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 073ded26e259..f3f0020b5b58 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2018,7 +2018,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp) { struct rcu_node *rnp = rcu_get_root(rsp); - return rnp->n_nocb_gp_requests[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; + return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; } /* @@ -2032,8 +2032,8 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) int needmore; wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); - rnp->n_nocb_gp_requests[c & 0x1] = 0; - needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + rnp->need_future_gp[c & 0x1] = 0; + needmore = rnp->need_future_gp[(c + 1) & 0x1]; trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, c, rnp->level, rnp->grplo, rnp->grphi, needmore ? "CleanupMore" : "Cleanup"); @@ -2041,7 +2041,7 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* - * Set the root rcu_node structure's ->n_nocb_gp_requests field + * Set the root rcu_node structure's ->need_future_gp field * based on the sum of those of all rcu_node structures. This does * double-count the root rcu_node structure's requests, but this * is necessary to handle the possibility of a rcu_nocb_kthread() @@ -2050,7 +2050,7 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) */ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) { - rnp->n_nocb_gp_requests[(rnp->completed + 1) & 0x1] += nrq; + rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; } static void rcu_init_one_nocb(struct rcu_node *rnp) @@ -2181,7 +2181,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp->completed + 2; /* Count our request for a grace period. */ - rnp->n_nocb_gp_requests[c & 0x1]++; + rnp->need_future_gp[c & 0x1]++; trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, c, rnp->level, rnp->grplo, rnp->grphi, "Startleaf"); @@ -2225,10 +2225,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Adjust counters accordingly and start the * needed grace period. */ - rnp->n_nocb_gp_requests[c & 0x1]--; + rnp->need_future_gp[c & 0x1]--; c = rnp_root->completed + 1; - rnp->n_nocb_gp_requests[c & 0x1]++; - rnp_root->n_nocb_gp_requests[c & 0x1]++; + rnp->need_future_gp[c & 0x1]++; + rnp_root->need_future_gp[c & 0x1]++; trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, From 0446be489795d8bb994125a916ef03211f539e54 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 30 Dec 2012 15:21:01 -0800 Subject: [PATCH 259/261] rcu: Abstract rcu_start_future_gp() from rcu_nocb_wait_gp() CPUs going idle will need to record the need for a future grace period, but won't actually need to block waiting on it. This commit therefore splits rcu_start_future_gp(), which does the recording, from rcu_nocb_wait_gp(), which now invokes rcu_start_future_gp() to do the recording, after which rcu_nocb_wait_gp() does the waiting. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 123 ++++++++++++++++++++++++++++++++++++++-- kernel/rcutree.h | 2 +- kernel/rcutree_plugin.h | 104 ++++----------------------------- 3 files changed, 130 insertions(+), 99 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0d532950baa3..f4b23f16677a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -224,6 +224,7 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); +static void rcu_start_gp(struct rcu_state *rsp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); @@ -1074,6 +1075,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, return rnp->completed + 2; } +/* + * Trace-event helper function for rcu_start_future_gp() and + * rcu_nocb_wait_gp(). + */ +static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long c, char *s) +{ + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, s); +} + +/* + * Start some future grace period, as needed to handle newly arrived + * callbacks. The required future grace periods are recorded in each + * rcu_node structure's ->need_future_gp field. + * + * The caller must hold the specified rcu_node structure's ->lock. + */ +static unsigned long __maybe_unused +rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) +{ + unsigned long c; + int i; + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); + + /* + * Pick up grace-period number for new callbacks. If this + * grace period is already marked as needed, return to the caller. + */ + c = rcu_cbs_completed(rdp->rsp, rnp); + trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); + if (rnp->need_future_gp[c & 0x1]) { + trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); + return c; + } + + /* + * If either this rcu_node structure or the root rcu_node structure + * believe that a grace period is in progress, then we must wait + * for the one following, which is in "c". Because our request + * will be noticed at the end of the current grace period, we don't + * need to explicitly start one. + */ + if (rnp->gpnum != rnp->completed || + ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { + rnp->need_future_gp[c & 0x1]++; + trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); + return c; + } + + /* + * There might be no grace period in progress. If we don't already + * hold it, acquire the root rcu_node structure's lock in order to + * start one (if needed). + */ + if (rnp != rnp_root) + raw_spin_lock(&rnp_root->lock); + + /* + * Get a new grace-period number. If there really is no grace + * period in progress, it will be smaller than the one we obtained + * earlier. Adjust callbacks as needed. Note that even no-CBs + * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. + */ + c = rcu_cbs_completed(rdp->rsp, rnp_root); + for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) + if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) + rdp->nxtcompleted[i] = c; + + /* + * If the needed for the required grace period is already + * recorded, trace and leave. + */ + if (rnp_root->need_future_gp[c & 0x1]) { + trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); + goto unlock_out; + } + + /* Record the need for the future grace period. */ + rnp_root->need_future_gp[c & 0x1]++; + + /* If a grace period is not already in progress, start one. */ + if (rnp_root->gpnum != rnp_root->completed) { + trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); + } else { + trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); + rcu_start_gp(rdp->rsp); + } +unlock_out: + if (rnp != rnp_root) + raw_spin_unlock(&rnp_root->lock); + return c; +} + +/* + * Clean up any old requests for the just-ended grace period. Also return + * whether any additional grace periods have been requested. Also invoke + * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads + * waiting for this grace period to complete. + */ +static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int c = rnp->completed; + int needmore; + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + + rcu_nocb_gp_cleanup(rsp, rnp); + rnp->need_future_gp[c & 0x1] = 0; + needmore = rnp->need_future_gp[(c + 1) & 0x1]; + trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); + return needmore; +} + /* * If there is room, assign a ->completed number to any callbacks on * this CPU that have not already been assigned. Also accelerate any @@ -1312,9 +1427,9 @@ static int rcu_gp_init(struct rcu_state *rsp) rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; - rnp->gpnum = rsp->gpnum; + ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; WARN_ON_ONCE(rnp->completed != rsp->completed); - rnp->completed = rsp->completed; + ACCESS_ONCE(rnp->completed) = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); @@ -1395,11 +1510,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) */ rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); - rnp->completed = rsp->gpnum; + ACCESS_ONCE(rnp->completed) = rsp->gpnum; rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) __rcu_process_gp_end(rsp, rnp, rdp); - nocb += rcu_nocb_gp_cleanup(rsp, rnp); + nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 034b524594bd..c11b46c33159 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -526,7 +526,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); static int rcu_nocb_needs_gp(struct rcu_state *rsp); static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); -static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f3f0020b5b58..723af5f707f0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2022,22 +2022,12 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp) } /* - * Clean up this rcu_node structure's no-CBs state at the end of - * a grace period, and also return whether any no-CBs CPU associated - * with this rcu_node structure needs another grace period. + * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended + * grace period. */ -static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { - int c = rnp->completed; - int needmore; - - wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); - rnp->need_future_gp[c & 0x1] = 0; - needmore = rnp->need_future_gp[(c + 1) & 0x1]; - trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - needmore ? "CleanupMore" : "Cleanup"); - return needmore; + wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]); } /* @@ -2175,84 +2165,16 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) bool d; unsigned long flags; struct rcu_node *rnp = rdp->mynode; - struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - c = rnp->completed + 2; - - /* Count our request for a grace period. */ - rnp->need_future_gp[c & 0x1]++; - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, "Startleaf"); - - if (rnp->gpnum != rnp->completed) { - - /* - * This rcu_node structure believes that a grace period - * is in progress, so we are done. When this grace - * period ends, our request will be acted upon. - */ - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleaf"); - raw_spin_unlock_irqrestore(&rnp->lock, flags); - - } else { - - /* - * Might not be a grace period, check root rcu_node - * structure to see if we must start one. - */ - if (rnp != rnp_root) - raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ - if (rnp_root->gpnum != rnp_root->completed) { - trace_rcu_future_grace_period(rdp->rsp->name, - rnp->gpnum, - rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleafroot"); - raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ - } else { - - /* - * No grace period, so we need to start one. - * The good news is that we can wait for exactly - * one grace period instead of part of the current - * grace period and all of the next grace period. - * Adjust counters accordingly and start the - * needed grace period. - */ - rnp->need_future_gp[c & 0x1]--; - c = rnp_root->completed + 1; - rnp->need_future_gp[c & 0x1]++; - rnp_root->need_future_gp[c & 0x1]++; - trace_rcu_future_grace_period(rdp->rsp->name, - rnp->gpnum, - rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedroot"); - rcu_start_gp(rdp->rsp); - raw_spin_unlock(&rnp->lock); - } - - /* Clean up locking and irq state. */ - if (rnp != rnp_root) - raw_spin_unlock_irqrestore(&rnp->lock, flags); - else - local_irq_restore(flags); - } + c = rcu_start_future_gp(rnp, rdp); + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, "StartWait"); + trace_rcu_future_gp(rnp, rdp, c, "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2260,14 +2182,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); - trace_rcu_future_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, - rnp->grphi, "ResumeWait"); + trace_rcu_future_gp(rnp, rdp, c, "ResumeWait"); } - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, "EndWait"); + trace_rcu_future_gp(rnp, rdp, c, "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } @@ -2375,9 +2292,8 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp) return 0; } -static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { - return 0; } static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) From 910ee45db2f4837c8440e770474758493ab94bf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Dec 2012 02:24:21 -0800 Subject: [PATCH 260/261] rcu: Make rcu_accelerate_cbs() note need for future grace periods Now that rcu_start_future_gp() has been abstracted from rcu_nocb_wait_gp(), rcu_accelerate_cbs() can invoke rcu_start_future_gp() so as to register the need for any future grace periods needed by a CPU about to enter dyntick-idle mode. This commit makes this change. Note that some refactoring of rcu_start_gp() is carried out to avoid recursion and subsequent self-deadlocks. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 50 +++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f4b23f16677a..9cb91e4885af 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -224,7 +224,8 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); -static void rcu_start_gp(struct rcu_state *rsp); +static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); @@ -1162,7 +1163,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); } else { trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); - rcu_start_gp(rdp->rsp); + rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); } unlock_out: if (rnp != rnp_root) @@ -1248,6 +1249,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxtcompleted[i] = c; } + /* Record any needed additional grace periods. */ + rcu_start_future_gp(rnp, rdp); /* Trace depending on how much we were able to accelerate. */ if (!*rdp->nxttail[RCU_WAIT_TAIL]) @@ -1609,20 +1612,9 @@ static int __noreturn rcu_gp_kthread(void *arg) * quiescent state. */ static void -rcu_start_gp(struct rcu_state *rsp) +rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) { - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); - struct rcu_node *rnp = rcu_get_root(rsp); - - /* - * If there is no grace period in progress right now, any - * callbacks we have up to this point will be satisfied by the - * next grace period. Also, advancing the callbacks reduces the - * probability of false positives from cpu_needs_another_gp() - * resulting in pointless grace periods. So, advance callbacks! - */ - rcu_advance_cbs(rsp, rnp, rdp); - if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* * Either we have not yet spawned the grace-period @@ -1634,13 +1626,35 @@ rcu_start_gp(struct rcu_state *rsp) } rsp->gp_flags = RCU_GP_FLAG_INIT; - /* Ensure that CPU is aware of completion of last grace period. */ - __rcu_process_gp_end(rsp, rdp->mynode, rdp); - /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); } +/* + * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's + * callbacks. Note that rcu_start_gp_advanced() cannot do this because it + * is invoked indirectly from rcu_advance_cbs(), which would result in + * endless recursion -- or would do so if it wasn't for the self-deadlock + * that is encountered beforehand. + */ +static void +rcu_start_gp(struct rcu_state *rsp) +{ + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_node *rnp = rcu_get_root(rsp); + + /* + * If there is no grace period in progress right now, any + * callbacks we have up to this point will be satisfied by the + * next grace period. Also, advancing the callbacks reduces the + * probability of false positives from cpu_needs_another_gp() + * resulting in pointless grace periods. So, advance callbacks + * then start the grace period! + */ + rcu_advance_cbs(rsp, rnp, rdp); + rcu_start_gp_advanced(rsp, rnp, rdp); +} + /* * Report a full set of quiescent states to the specified rcu_state * data structure. This involves cleaning up after the prior grace From 49717cb40410fe4b563968680ff7c513967504c6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 Apr 2013 08:07:11 -0700 Subject: [PATCH 261/261] kthread: Document ways of reducing OS jitter due to per-CPU kthreads The Linux kernel uses a number of per-CPU kthreads, any of which might contribute to OS jitter at any time. The usual approach to normal kthreads, namely to bind them to a "housekeeping" CPU, does not work with these kthreads because they cannot operate correctly if moved to some other CPU. This commit therefore lists ways of controlling OS jitter from the Linux kernel's per-CPU kthreads. It also lists some ways of diagnosing excessive jitter. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Borislav Petkov Cc: Arjan van de Ven Cc: Kevin Hilman Cc: Christoph Lameter Cc: Thomas Gleixner Cc: Olivier Baetz Cc: Pradeep Satyanarayana Reviewed-by: Randy Dunlap Reviewed-by: Borislav Petkov --- Documentation/kernel-per-CPU-kthreads.txt | 202 ++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 Documentation/kernel-per-CPU-kthreads.txt diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt new file mode 100644 index 000000000000..cbf7ae412da4 --- /dev/null +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -0,0 +1,202 @@ +REDUCING OS JITTER DUE TO PER-CPU KTHREADS + +This document lists per-CPU kthreads in the Linux kernel and presents +options to control their OS jitter. Note that non-per-CPU kthreads are +not listed here. To reduce OS jitter from non-per-CPU kthreads, bind +them to a "housekeeping" CPU dedicated to such work. + + +REFERENCES + +o Documentation/IRQ-affinity.txt: Binding interrupts to sets of CPUs. + +o Documentation/cgroups: Using cgroups to bind tasks to sets of CPUs. + +o man taskset: Using the taskset command to bind tasks to sets + of CPUs. + +o man sched_setaffinity: Using the sched_setaffinity() system + call to bind tasks to sets of CPUs. + +o /sys/devices/system/cpu/cpuN/online: Control CPU N's hotplug state, + writing "0" to offline and "1" to online. + +o In order to locate kernel-generated OS jitter on CPU N: + + cd /sys/kernel/debug/tracing + echo 1 > max_graph_depth # Increase the "1" for more detail + echo function_graph > current_tracer + # run workload + cat per_cpu/cpuN/trace + + +KTHREADS + +Name: ehca_comp/%u +Purpose: Periodically process Infiniband-related work. +To reduce its OS jitter, do any of the following: +1. Don't use eHCA Infiniband hardware, instead choosing hardware + that does not require per-CPU kthreads. This will prevent these + kthreads from being created in the first place. (This will + work for most people, as this hardware, though important, is + relatively old and is produced in relatively low unit volumes.) +2. Do all eHCA-Infiniband-related work on other CPUs, including + interrupts. +3. Rework the eHCA driver so that its per-CPU kthreads are + provisioned only on selected CPUs. + + +Name: irq/%d-%s +Purpose: Handle threaded interrupts. +To reduce its OS jitter, do the following: +1. Use irq affinity to force the irq threads to execute on + some other CPU. + +Name: kcmtpd_ctr_%d +Purpose: Handle Bluetooth work. +To reduce its OS jitter, do one of the following: +1. Don't use Bluetooth, in which case these kthreads won't be + created in the first place. +2. Use irq affinity to force Bluetooth-related interrupts to + occur on some other CPU and furthermore initiate all + Bluetooth activity on some other CPU. + +Name: ksoftirqd/%u +Purpose: Execute softirq handlers when threaded or when under heavy load. +To reduce its OS jitter, each softirq vector must be handled +separately as follows: +TIMER_SOFTIRQ: Do all of the following: +1. To the extent possible, keep the CPU out of the kernel when it + is non-idle, for example, by avoiding system calls and by forcing + both kernel threads and interrupts to execute elsewhere. +2. Build with CONFIG_HOTPLUG_CPU=y. After boot completes, force + the CPU offline, then bring it back online. This forces + recurring timers to migrate elsewhere. If you are concerned + with multiple CPUs, force them all offline before bringing the + first one back online. Once you have onlined the CPUs in question, + do not offline any other CPUs, because doing so could force the + timer back onto one of the CPUs in question. +NET_TX_SOFTIRQ and NET_RX_SOFTIRQ: Do all of the following: +1. Force networking interrupts onto other CPUs. +2. Initiate any network I/O on other CPUs. +3. Once your application has started, prevent CPU-hotplug operations + from being initiated from tasks that might run on the CPU to + be de-jittered. (It is OK to force this CPU offline and then + bring it back online before you start your application.) +BLOCK_SOFTIRQ: Do all of the following: +1. Force block-device interrupts onto some other CPU. +2. Initiate any block I/O on other CPUs. +3. Once your application has started, prevent CPU-hotplug operations + from being initiated from tasks that might run on the CPU to + be de-jittered. (It is OK to force this CPU offline and then + bring it back online before you start your application.) +BLOCK_IOPOLL_SOFTIRQ: Do all of the following: +1. Force block-device interrupts onto some other CPU. +2. Initiate any block I/O and block-I/O polling on other CPUs. +3. Once your application has started, prevent CPU-hotplug operations + from being initiated from tasks that might run on the CPU to + be de-jittered. (It is OK to force this CPU offline and then + bring it back online before you start your application.) +TASKLET_SOFTIRQ: Do one or more of the following: +1. Avoid use of drivers that use tasklets. (Such drivers will contain + calls to things like tasklet_schedule().) +2. Convert all drivers that you must use from tasklets to workqueues. +3. Force interrupts for drivers using tasklets onto other CPUs, + and also do I/O involving these drivers on other CPUs. +SCHED_SOFTIRQ: Do all of the following: +1. Avoid sending scheduler IPIs to the CPU to be de-jittered, + for example, ensure that at most one runnable kthread is present + on that CPU. If a thread that expects to run on the de-jittered + CPU awakens, the scheduler will send an IPI that can result in + a subsequent SCHED_SOFTIRQ. +2. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, + CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU + to be de-jittered is marked as an adaptive-ticks CPU using the + "nohz_full=" boot parameter. This reduces the number of + scheduler-clock interrupts that the de-jittered CPU receives, + minimizing its chances of being selected to do the load balancing + work that runs in SCHED_SOFTIRQ context. +3. To the extent possible, keep the CPU out of the kernel when it + is non-idle, for example, by avoiding system calls and by + forcing both kernel threads and interrupts to execute elsewhere. + This further reduces the number of scheduler-clock interrupts + received by the de-jittered CPU. +HRTIMER_SOFTIRQ: Do all of the following: +1. To the extent possible, keep the CPU out of the kernel when it + is non-idle. For example, avoid system calls and force both + kernel threads and interrupts to execute elsewhere. +2. Build with CONFIG_HOTPLUG_CPU=y. Once boot completes, force the + CPU offline, then bring it back online. This forces recurring + timers to migrate elsewhere. If you are concerned with multiple + CPUs, force them all offline before bringing the first one + back online. Once you have onlined the CPUs in question, do not + offline any other CPUs, because doing so could force the timer + back onto one of the CPUs in question. +RCU_SOFTIRQ: Do at least one of the following: +1. Offload callbacks and keep the CPU in either dyntick-idle or + adaptive-ticks state by doing all of the following: + a. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, + CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU + to be de-jittered is marked as an adaptive-ticks CPU using + the "nohz_full=" boot parameter. Bind the rcuo kthreads + to housekeeping CPUs, which can tolerate OS jitter. + b. To the extent possible, keep the CPU out of the kernel + when it is non-idle, for example, by avoiding system + calls and by forcing both kernel threads and interrupts + to execute elsewhere. +2. Enable RCU to do its processing remotely via dyntick-idle by + doing all of the following: + a. Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y. + b. Ensure that the CPU goes idle frequently, allowing other + CPUs to detect that it has passed through an RCU quiescent + state. If the kernel is built with CONFIG_NO_HZ_FULL=y, + userspace execution also allows other CPUs to detect that + the CPU in question has passed through a quiescent state. + c. To the extent possible, keep the CPU out of the kernel + when it is non-idle, for example, by avoiding system + calls and by forcing both kernel threads and interrupts + to execute elsewhere. + +Name: rcuc/%u +Purpose: Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels. +To reduce its OS jitter, do at least one of the following: +1. Build the kernel with CONFIG_PREEMPT=n. This prevents these + kthreads from being created in the first place, and also obviates + the need for RCU priority boosting. This approach is feasible + for workloads that do not require high degrees of responsiveness. +2. Build the kernel with CONFIG_RCU_BOOST=n. This prevents these + kthreads from being created in the first place. This approach + is feasible only if your workload never requires RCU priority + boosting, for example, if you ensure frequent idle time on all + CPUs that might execute within the kernel. +3. Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y, + which offloads all RCU callbacks to kthreads that can be moved + off of CPUs susceptible to OS jitter. This approach prevents the + rcuc/%u kthreads from having any work to do, so that they are + never awakened. +4. Ensure that the CPU never enters the kernel, and, in particular, + avoid initiating any CPU hotplug operations on this CPU. This is + another way of preventing any callbacks from being queued on the + CPU, again preventing the rcuc/%u kthreads from having any work + to do. + +Name: rcuob/%d, rcuop/%d, and rcuos/%d +Purpose: Offload RCU callbacks from the corresponding CPU. +To reduce its OS jitter, do at least one of the following: +1. Use affinity, cgroups, or other mechanism to force these kthreads + to execute on some other CPU. +2. Build with CONFIG_RCU_NOCB_CPUS=n, which will prevent these + kthreads from being created in the first place. However, please + note that this will not eliminate OS jitter, but will instead + shift it to RCU_SOFTIRQ. + +Name: watchdog/%u +Purpose: Detect software lockups on each CPU. +To reduce its OS jitter, do at least one of the following: +1. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these + kthreads from being created in the first place. +2. Echo a zero to /proc/sys/kernel/watchdog to disable the + watchdog timer. +3. Echo a large number of /proc/sys/kernel/watchdog_thresh in + order to reduce the frequency of OS jitter due to the watchdog + timer down to a level that is acceptable for your workload.