forked from Minki/linux
3a6bfbc91d
The arch_mutex_cpu_relax() function, introduced by 34b133f
, is
hacky and ugly. It was added a few years ago to address the fact
that common cpu_relax() calls include yielding on s390, and thus
impact the optimistic spinning functionality of mutexes. Nowadays
we use this function well beyond mutexes: rwsem, qrwlock, mcs and
lockref. Since the macro that defines the call is in the mutex header,
any users must include mutex.h and the naming is misleading as well.
This patch (i) renames the call to cpu_relax_lowlatency ("relax, but
only if you can do it with very low latency") and (ii) defines it in
each arch's asm/processor.h local header, just like for regular cpu_relax
functions. On all archs, except s390, cpu_relax_lowlatency is simply cpu_relax,
and thus we can take it out of mutex.h. While this can seem redundant,
I believe it is a good choice as it allows us to move out arch specific
logic from generic locking primitives and enables future(?) archs to
transparently define it, similarly to System Z.
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bharat Bhushan <r65777@freescale.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Howells <dhowells@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Cc: Dominik Dingel <dingel@linux.vnet.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Joseph Myers <joseph@codesourcery.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Stratos Karafotis <stratosk@semaphore.gr>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vasily Kulikov <segoon@openwall.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: adi-buildroot-devel@lists.sourceforge.net
Cc: linux390@de.ibm.com
Cc: linux-alpha@vger.kernel.org
Cc: linux-am33-list@redhat.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-cris-kernel@axis.com
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux@lists.openrisc.net
Cc: linux-m32r-ja@ml.linux-m32r.org
Cc: linux-m32r@ml.linux-m32r.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-metag@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/1404079773.2619.4.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
131 lines
3.7 KiB
C
131 lines
3.7 KiB
C
/*
|
|
* MCS lock defines
|
|
*
|
|
* This file contains the main data structure and API definitions of MCS lock.
|
|
*
|
|
* The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
|
|
* with the desirable properties of being fair, and with each cpu trying
|
|
* to acquire the lock spinning on a local variable.
|
|
* It avoids expensive cache bouncings that common test-and-set spin-lock
|
|
* implementations incur.
|
|
*/
|
|
#ifndef __LINUX_MCS_SPINLOCK_H
|
|
#define __LINUX_MCS_SPINLOCK_H
|
|
|
|
#include <asm/mcs_spinlock.h>
|
|
|
|
struct mcs_spinlock {
|
|
struct mcs_spinlock *next;
|
|
int locked; /* 1 if lock acquired */
|
|
};
|
|
|
|
#ifndef arch_mcs_spin_lock_contended
|
|
/*
|
|
* Using smp_load_acquire() provides a memory barrier that ensures
|
|
* subsequent operations happen after the lock is acquired.
|
|
*/
|
|
#define arch_mcs_spin_lock_contended(l) \
|
|
do { \
|
|
while (!(smp_load_acquire(l))) \
|
|
cpu_relax_lowlatency(); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#ifndef arch_mcs_spin_unlock_contended
|
|
/*
|
|
* smp_store_release() provides a memory barrier to ensure all
|
|
* operations in the critical section has been completed before
|
|
* unlocking.
|
|
*/
|
|
#define arch_mcs_spin_unlock_contended(l) \
|
|
smp_store_release((l), 1)
|
|
#endif
|
|
|
|
/*
|
|
* Note: the smp_load_acquire/smp_store_release pair is not
|
|
* sufficient to form a full memory barrier across
|
|
* cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
|
|
* For applications that need a full barrier across multiple cpus
|
|
* with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
|
|
* used after mcs_lock.
|
|
*/
|
|
|
|
/*
|
|
* In order to acquire the lock, the caller should declare a local node and
|
|
* pass a reference of the node to this function in addition to the lock.
|
|
* If the lock has already been acquired, then this will proceed to spin
|
|
* on this node->locked until the previous lock holder sets the node->locked
|
|
* in mcs_spin_unlock().
|
|
*
|
|
* We don't inline mcs_spin_lock() so that perf can correctly account for the
|
|
* time spent in this lock function.
|
|
*/
|
|
static inline
|
|
void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
|
|
{
|
|
struct mcs_spinlock *prev;
|
|
|
|
/* Init node */
|
|
node->locked = 0;
|
|
node->next = NULL;
|
|
|
|
prev = xchg(lock, node);
|
|
if (likely(prev == NULL)) {
|
|
/*
|
|
* Lock acquired, don't need to set node->locked to 1. Threads
|
|
* only spin on its own node->locked value for lock acquisition.
|
|
* However, since this thread can immediately acquire the lock
|
|
* and does not proceed to spin on its own node->locked, this
|
|
* value won't be used. If a debug mode is needed to
|
|
* audit lock status, then set node->locked value here.
|
|
*/
|
|
return;
|
|
}
|
|
ACCESS_ONCE(prev->next) = node;
|
|
|
|
/* Wait until the lock holder passes the lock down. */
|
|
arch_mcs_spin_lock_contended(&node->locked);
|
|
}
|
|
|
|
/*
|
|
* Releases the lock. The caller should pass in the corresponding node that
|
|
* was used to acquire the lock.
|
|
*/
|
|
static inline
|
|
void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
|
|
{
|
|
struct mcs_spinlock *next = ACCESS_ONCE(node->next);
|
|
|
|
if (likely(!next)) {
|
|
/*
|
|
* Release the lock by setting it to NULL
|
|
*/
|
|
if (likely(cmpxchg(lock, node, NULL) == node))
|
|
return;
|
|
/* Wait until the next pointer is set */
|
|
while (!(next = ACCESS_ONCE(node->next)))
|
|
cpu_relax_lowlatency();
|
|
}
|
|
|
|
/* Pass lock to next waiter. */
|
|
arch_mcs_spin_unlock_contended(&next->locked);
|
|
}
|
|
|
|
/*
|
|
* Cancellable version of the MCS lock above.
|
|
*
|
|
* Intended for adaptive spinning of sleeping locks:
|
|
* mutex_lock()/rwsem_down_{read,write}() etc.
|
|
*/
|
|
|
|
struct optimistic_spin_node {
|
|
struct optimistic_spin_node *next, *prev;
|
|
int locked; /* 1 if lock acquired */
|
|
int cpu; /* encoded CPU # value */
|
|
};
|
|
|
|
extern bool osq_lock(struct optimistic_spin_queue *lock);
|
|
extern void osq_unlock(struct optimistic_spin_queue *lock);
|
|
|
|
#endif /* __LINUX_MCS_SPINLOCK_H */
|