forked from Minki/linux
3a6bfbc91d
The arch_mutex_cpu_relax() function, introduced by 34b133f
, is
hacky and ugly. It was added a few years ago to address the fact
that common cpu_relax() calls include yielding on s390, and thus
impact the optimistic spinning functionality of mutexes. Nowadays
we use this function well beyond mutexes: rwsem, qrwlock, mcs and
lockref. Since the macro that defines the call is in the mutex header,
any users must include mutex.h and the naming is misleading as well.
This patch (i) renames the call to cpu_relax_lowlatency ("relax, but
only if you can do it with very low latency") and (ii) defines it in
each arch's asm/processor.h local header, just like for regular cpu_relax
functions. On all archs, except s390, cpu_relax_lowlatency is simply cpu_relax,
and thus we can take it out of mutex.h. While this can seem redundant,
I believe it is a good choice as it allows us to move out arch specific
logic from generic locking primitives and enables future(?) archs to
transparently define it, similarly to System Z.
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bharat Bhushan <r65777@freescale.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Howells <dhowells@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Cc: Dominik Dingel <dingel@linux.vnet.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Joseph Myers <joseph@codesourcery.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Stratos Karafotis <stratosk@semaphore.gr>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vasily Kulikov <segoon@openwall.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: adi-buildroot-devel@lists.sourceforge.net
Cc: linux390@de.ibm.com
Cc: linux-alpha@vger.kernel.org
Cc: linux-am33-list@redhat.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-cris-kernel@axis.com
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux@lists.openrisc.net
Cc: linux-m32r-ja@ml.linux-m32r.org
Cc: linux-m32r@ml.linux-m32r.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-metag@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/1404079773.2619.4.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
209 lines
4.9 KiB
C
209 lines
4.9 KiB
C
#include <linux/percpu.h>
|
|
#include <linux/sched.h>
|
|
#include "mcs_spinlock.h"
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
/*
|
|
* An MCS like lock especially tailored for optimistic spinning for sleeping
|
|
* lock implementations (mutex, rwsem, etc).
|
|
*
|
|
* Using a single mcs node per CPU is safe because sleeping locks should not be
|
|
* called from interrupt context and we have preemption disabled while
|
|
* spinning.
|
|
*/
|
|
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
|
|
|
|
/*
|
|
* We use the value 0 to represent "no CPU", thus the encoded value
|
|
* will be the CPU number incremented by 1.
|
|
*/
|
|
static inline int encode_cpu(int cpu_nr)
|
|
{
|
|
return cpu_nr + 1;
|
|
}
|
|
|
|
static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
|
|
{
|
|
int cpu_nr = encoded_cpu_val - 1;
|
|
|
|
return per_cpu_ptr(&osq_node, cpu_nr);
|
|
}
|
|
|
|
/*
|
|
* Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
|
|
* Can return NULL in case we were the last queued and we updated @lock instead.
|
|
*/
|
|
static inline struct optimistic_spin_node *
|
|
osq_wait_next(struct optimistic_spin_queue *lock,
|
|
struct optimistic_spin_node *node,
|
|
struct optimistic_spin_node *prev)
|
|
{
|
|
struct optimistic_spin_node *next = NULL;
|
|
int curr = encode_cpu(smp_processor_id());
|
|
int old;
|
|
|
|
/*
|
|
* If there is a prev node in queue, then the 'old' value will be
|
|
* the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
|
|
* we're currently last in queue, then the queue will then become empty.
|
|
*/
|
|
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
|
|
|
|
for (;;) {
|
|
if (atomic_read(&lock->tail) == curr &&
|
|
atomic_cmpxchg(&lock->tail, curr, old) == curr) {
|
|
/*
|
|
* We were the last queued, we moved @lock back. @prev
|
|
* will now observe @lock and will complete its
|
|
* unlock()/unqueue().
|
|
*/
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* We must xchg() the @node->next value, because if we were to
|
|
* leave it in, a concurrent unlock()/unqueue() from
|
|
* @node->next might complete Step-A and think its @prev is
|
|
* still valid.
|
|
*
|
|
* If the concurrent unlock()/unqueue() wins the race, we'll
|
|
* wait for either @lock to point to us, through its Step-B, or
|
|
* wait for a new @node->next from its Step-C.
|
|
*/
|
|
if (node->next) {
|
|
next = xchg(&node->next, NULL);
|
|
if (next)
|
|
break;
|
|
}
|
|
|
|
cpu_relax_lowlatency();
|
|
}
|
|
|
|
return next;
|
|
}
|
|
|
|
bool osq_lock(struct optimistic_spin_queue *lock)
|
|
{
|
|
struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
|
|
struct optimistic_spin_node *prev, *next;
|
|
int curr = encode_cpu(smp_processor_id());
|
|
int old;
|
|
|
|
node->locked = 0;
|
|
node->next = NULL;
|
|
node->cpu = curr;
|
|
|
|
old = atomic_xchg(&lock->tail, curr);
|
|
if (old == OSQ_UNLOCKED_VAL)
|
|
return true;
|
|
|
|
prev = decode_cpu(old);
|
|
node->prev = prev;
|
|
ACCESS_ONCE(prev->next) = node;
|
|
|
|
/*
|
|
* Normally @prev is untouchable after the above store; because at that
|
|
* moment unlock can proceed and wipe the node element from stack.
|
|
*
|
|
* However, since our nodes are static per-cpu storage, we're
|
|
* guaranteed their existence -- this allows us to apply
|
|
* cmpxchg in an attempt to undo our queueing.
|
|
*/
|
|
|
|
while (!smp_load_acquire(&node->locked)) {
|
|
/*
|
|
* If we need to reschedule bail... so we can block.
|
|
*/
|
|
if (need_resched())
|
|
goto unqueue;
|
|
|
|
cpu_relax_lowlatency();
|
|
}
|
|
return true;
|
|
|
|
unqueue:
|
|
/*
|
|
* Step - A -- stabilize @prev
|
|
*
|
|
* Undo our @prev->next assignment; this will make @prev's
|
|
* unlock()/unqueue() wait for a next pointer since @lock points to us
|
|
* (or later).
|
|
*/
|
|
|
|
for (;;) {
|
|
if (prev->next == node &&
|
|
cmpxchg(&prev->next, node, NULL) == node)
|
|
break;
|
|
|
|
/*
|
|
* We can only fail the cmpxchg() racing against an unlock(),
|
|
* in which case we should observe @node->locked becomming
|
|
* true.
|
|
*/
|
|
if (smp_load_acquire(&node->locked))
|
|
return true;
|
|
|
|
cpu_relax_lowlatency();
|
|
|
|
/*
|
|
* Or we race against a concurrent unqueue()'s step-B, in which
|
|
* case its step-C will write us a new @node->prev pointer.
|
|
*/
|
|
prev = ACCESS_ONCE(node->prev);
|
|
}
|
|
|
|
/*
|
|
* Step - B -- stabilize @next
|
|
*
|
|
* Similar to unlock(), wait for @node->next or move @lock from @node
|
|
* back to @prev.
|
|
*/
|
|
|
|
next = osq_wait_next(lock, node, prev);
|
|
if (!next)
|
|
return false;
|
|
|
|
/*
|
|
* Step - C -- unlink
|
|
*
|
|
* @prev is stable because its still waiting for a new @prev->next
|
|
* pointer, @next is stable because our @node->next pointer is NULL and
|
|
* it will wait in Step-A.
|
|
*/
|
|
|
|
ACCESS_ONCE(next->prev) = prev;
|
|
ACCESS_ONCE(prev->next) = next;
|
|
|
|
return false;
|
|
}
|
|
|
|
void osq_unlock(struct optimistic_spin_queue *lock)
|
|
{
|
|
struct optimistic_spin_node *node, *next;
|
|
int curr = encode_cpu(smp_processor_id());
|
|
|
|
/*
|
|
* Fast path for the uncontended case.
|
|
*/
|
|
if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
|
|
return;
|
|
|
|
/*
|
|
* Second most likely case.
|
|
*/
|
|
node = this_cpu_ptr(&osq_node);
|
|
next = xchg(&node->next, NULL);
|
|
if (next) {
|
|
ACCESS_ONCE(next->locked) = 1;
|
|
return;
|
|
}
|
|
|
|
next = osq_wait_next(lock, node, NULL);
|
|
if (next)
|
|
ACCESS_ONCE(next->locked) = 1;
|
|
}
|
|
|
|
#endif
|
|
|