[PATCH] pi-futex: futex_lock_pi/futex_unlock_pi support
This adds the actual pi-futex implementation, based on rt-mutexes. [dino@in.ibm.com: fix an oops-causing race] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
0cdbee9920
commit
c87e2837be
@ -12,6 +12,9 @@
|
|||||||
#define FUTEX_REQUEUE 3
|
#define FUTEX_REQUEUE 3
|
||||||
#define FUTEX_CMP_REQUEUE 4
|
#define FUTEX_CMP_REQUEUE 4
|
||||||
#define FUTEX_WAKE_OP 5
|
#define FUTEX_WAKE_OP 5
|
||||||
|
#define FUTEX_LOCK_PI 6
|
||||||
|
#define FUTEX_UNLOCK_PI 7
|
||||||
|
#define FUTEX_TRYLOCK_PI 8
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Support for robust futexes: the kernel cleans up held futexes at
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
@ -97,10 +100,14 @@ extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr);
|
|||||||
|
|
||||||
#ifdef CONFIG_FUTEX
|
#ifdef CONFIG_FUTEX
|
||||||
extern void exit_robust_list(struct task_struct *curr);
|
extern void exit_robust_list(struct task_struct *curr);
|
||||||
|
extern void exit_pi_state_list(struct task_struct *curr);
|
||||||
#else
|
#else
|
||||||
static inline void exit_robust_list(struct task_struct *curr)
|
static inline void exit_robust_list(struct task_struct *curr)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
static inline void exit_pi_state_list(struct task_struct *curr)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
|
#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
|
||||||
|
@ -84,6 +84,7 @@ struct sched_param {
|
|||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
|
|
||||||
struct exec_domain;
|
struct exec_domain;
|
||||||
|
struct futex_pi_state;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* List of flags we want to share for kernel threads,
|
* List of flags we want to share for kernel threads,
|
||||||
@ -915,6 +916,8 @@ struct task_struct {
|
|||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
struct compat_robust_list_head __user *compat_robust_list;
|
struct compat_robust_list_head __user *compat_robust_list;
|
||||||
#endif
|
#endif
|
||||||
|
struct list_head pi_state_list;
|
||||||
|
struct futex_pi_state *pi_state_cache;
|
||||||
|
|
||||||
atomic_t fs_excl; /* holding fs exclusive resources */
|
atomic_t fs_excl; /* holding fs exclusive resources */
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
|
@ -925,6 +925,14 @@ fastcall NORET_TYPE void do_exit(long code)
|
|||||||
mpol_free(tsk->mempolicy);
|
mpol_free(tsk->mempolicy);
|
||||||
tsk->mempolicy = NULL;
|
tsk->mempolicy = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
/*
|
||||||
|
* This must happen late, after the PID is not
|
||||||
|
* hashed anymore:
|
||||||
|
*/
|
||||||
|
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||||
|
exit_pi_state_list(tsk);
|
||||||
|
if (unlikely(current->pi_state_cache))
|
||||||
|
kfree(current->pi_state_cache);
|
||||||
/*
|
/*
|
||||||
* If DEBUG_MUTEXES is on, make sure we are holding no locks:
|
* If DEBUG_MUTEXES is on, make sure we are holding no locks:
|
||||||
*/
|
*/
|
||||||
|
@ -1092,6 +1092,9 @@ static task_t *copy_process(unsigned long clone_flags,
|
|||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
p->compat_robust_list = NULL;
|
p->compat_robust_list = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
INIT_LIST_HEAD(&p->pi_state_list);
|
||||||
|
p->pi_state_cache = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* sigaltstack should be cleared when sharing the same VM
|
* sigaltstack should be cleared when sharing the same VM
|
||||||
*/
|
*/
|
||||||
|
823
kernel/futex.c
823
kernel/futex.c
@ -12,6 +12,10 @@
|
|||||||
* (C) Copyright 2006 Red Hat Inc, All Rights Reserved
|
* (C) Copyright 2006 Red Hat Inc, All Rights Reserved
|
||||||
* Thanks to Thomas Gleixner for suggestions, analysis and fixes.
|
* Thanks to Thomas Gleixner for suggestions, analysis and fixes.
|
||||||
*
|
*
|
||||||
|
* PI-futex support started by Ingo Molnar and Thomas Gleixner
|
||||||
|
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||||
|
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||||
|
*
|
||||||
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
|
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
|
||||||
* enough at me, Linus for the original (flawed) idea, Matthew
|
* enough at me, Linus for the original (flawed) idea, Matthew
|
||||||
* Kirkwood for proof-of-concept implementation.
|
* Kirkwood for proof-of-concept implementation.
|
||||||
@ -46,6 +50,8 @@
|
|||||||
#include <linux/signal.h>
|
#include <linux/signal.h>
|
||||||
#include <asm/futex.h>
|
#include <asm/futex.h>
|
||||||
|
|
||||||
|
#include "rtmutex_common.h"
|
||||||
|
|
||||||
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
|
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -74,6 +80,27 @@ union futex_key {
|
|||||||
} both;
|
} both;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Priority Inheritance state:
|
||||||
|
*/
|
||||||
|
struct futex_pi_state {
|
||||||
|
/*
|
||||||
|
* list of 'owned' pi_state instances - these have to be
|
||||||
|
* cleaned up in do_exit() if the task exits prematurely:
|
||||||
|
*/
|
||||||
|
struct list_head list;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The PI object:
|
||||||
|
*/
|
||||||
|
struct rt_mutex pi_mutex;
|
||||||
|
|
||||||
|
struct task_struct *owner;
|
||||||
|
atomic_t refcount;
|
||||||
|
|
||||||
|
union futex_key key;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use this hashed waitqueue instead of a normal wait_queue_t, so
|
* We use this hashed waitqueue instead of a normal wait_queue_t, so
|
||||||
* we can wake only the relevant ones (hashed queues may be shared).
|
* we can wake only the relevant ones (hashed queues may be shared).
|
||||||
@ -96,6 +123,10 @@ struct futex_q {
|
|||||||
/* For fd, sigio sent using these: */
|
/* For fd, sigio sent using these: */
|
||||||
int fd;
|
int fd;
|
||||||
struct file *filp;
|
struct file *filp;
|
||||||
|
|
||||||
|
/* Optional priority inheritance state: */
|
||||||
|
struct futex_pi_state *pi_state;
|
||||||
|
struct task_struct *task;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -258,6 +289,232 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
|
|||||||
return ret ? -EFAULT : 0;
|
return ret ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fault handling. Called with current->mm->mmap_sem held.
|
||||||
|
*/
|
||||||
|
static int futex_handle_fault(unsigned long address, int attempt)
|
||||||
|
{
|
||||||
|
struct vm_area_struct * vma;
|
||||||
|
struct mm_struct *mm = current->mm;
|
||||||
|
|
||||||
|
if (attempt >= 2 || !(vma = find_vma(mm, address)) ||
|
||||||
|
vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
switch (handle_mm_fault(mm, vma, address, 1)) {
|
||||||
|
case VM_FAULT_MINOR:
|
||||||
|
current->min_flt++;
|
||||||
|
break;
|
||||||
|
case VM_FAULT_MAJOR:
|
||||||
|
current->maj_flt++;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PI code:
|
||||||
|
*/
|
||||||
|
static int refill_pi_state_cache(void)
|
||||||
|
{
|
||||||
|
struct futex_pi_state *pi_state;
|
||||||
|
|
||||||
|
if (likely(current->pi_state_cache))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pi_state = kmalloc(sizeof(*pi_state), GFP_KERNEL);
|
||||||
|
|
||||||
|
if (!pi_state)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
memset(pi_state, 0, sizeof(*pi_state));
|
||||||
|
INIT_LIST_HEAD(&pi_state->list);
|
||||||
|
/* pi_mutex gets initialized later */
|
||||||
|
pi_state->owner = NULL;
|
||||||
|
atomic_set(&pi_state->refcount, 1);
|
||||||
|
|
||||||
|
current->pi_state_cache = pi_state;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct futex_pi_state * alloc_pi_state(void)
|
||||||
|
{
|
||||||
|
struct futex_pi_state *pi_state = current->pi_state_cache;
|
||||||
|
|
||||||
|
WARN_ON(!pi_state);
|
||||||
|
current->pi_state_cache = NULL;
|
||||||
|
|
||||||
|
return pi_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void free_pi_state(struct futex_pi_state *pi_state)
|
||||||
|
{
|
||||||
|
if (!atomic_dec_and_test(&pi_state->refcount))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If pi_state->owner is NULL, the owner is most probably dying
|
||||||
|
* and has cleaned up the pi_state already
|
||||||
|
*/
|
||||||
|
if (pi_state->owner) {
|
||||||
|
spin_lock_irq(&pi_state->owner->pi_lock);
|
||||||
|
list_del_init(&pi_state->list);
|
||||||
|
spin_unlock_irq(&pi_state->owner->pi_lock);
|
||||||
|
|
||||||
|
rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current->pi_state_cache)
|
||||||
|
kfree(pi_state);
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* pi_state->list is already empty.
|
||||||
|
* clear pi_state->owner.
|
||||||
|
* refcount is at 0 - put it back to 1.
|
||||||
|
*/
|
||||||
|
pi_state->owner = NULL;
|
||||||
|
atomic_set(&pi_state->refcount, 1);
|
||||||
|
current->pi_state_cache = pi_state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look up the task based on what TID userspace gave us.
|
||||||
|
* We dont trust it.
|
||||||
|
*/
|
||||||
|
static struct task_struct * futex_find_get_task(pid_t pid)
|
||||||
|
{
|
||||||
|
struct task_struct *p;
|
||||||
|
|
||||||
|
read_lock(&tasklist_lock);
|
||||||
|
p = find_task_by_pid(pid);
|
||||||
|
if (!p)
|
||||||
|
goto out_unlock;
|
||||||
|
if ((current->euid != p->euid) && (current->euid != p->uid)) {
|
||||||
|
p = NULL;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) {
|
||||||
|
p = NULL;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
get_task_struct(p);
|
||||||
|
out_unlock:
|
||||||
|
read_unlock(&tasklist_lock);
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This task is holding PI mutexes at exit time => bad.
|
||||||
|
* Kernel cleans up PI-state, but userspace is likely hosed.
|
||||||
|
* (Robust-futex cleanup is separate and might save the day for userspace.)
|
||||||
|
*/
|
||||||
|
void exit_pi_state_list(struct task_struct *curr)
|
||||||
|
{
|
||||||
|
struct futex_hash_bucket *hb;
|
||||||
|
struct list_head *next, *head = &curr->pi_state_list;
|
||||||
|
struct futex_pi_state *pi_state;
|
||||||
|
union futex_key key;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are a ZOMBIE and nobody can enqueue itself on
|
||||||
|
* pi_state_list anymore, but we have to be careful
|
||||||
|
* versus waiters unqueueing themselfs
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&curr->pi_lock);
|
||||||
|
while (!list_empty(head)) {
|
||||||
|
|
||||||
|
next = head->next;
|
||||||
|
pi_state = list_entry(next, struct futex_pi_state, list);
|
||||||
|
key = pi_state->key;
|
||||||
|
spin_unlock_irq(&curr->pi_lock);
|
||||||
|
|
||||||
|
hb = hash_futex(&key);
|
||||||
|
spin_lock(&hb->lock);
|
||||||
|
|
||||||
|
spin_lock_irq(&curr->pi_lock);
|
||||||
|
if (head->next != next) {
|
||||||
|
spin_unlock(&hb->lock);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_del_init(&pi_state->list);
|
||||||
|
|
||||||
|
WARN_ON(pi_state->owner != curr);
|
||||||
|
|
||||||
|
pi_state->owner = NULL;
|
||||||
|
spin_unlock_irq(&curr->pi_lock);
|
||||||
|
|
||||||
|
rt_mutex_unlock(&pi_state->pi_mutex);
|
||||||
|
|
||||||
|
spin_unlock(&hb->lock);
|
||||||
|
|
||||||
|
spin_lock_irq(&curr->pi_lock);
|
||||||
|
}
|
||||||
|
spin_unlock_irq(&curr->pi_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
|
||||||
|
{
|
||||||
|
struct futex_pi_state *pi_state = NULL;
|
||||||
|
struct futex_q *this, *next;
|
||||||
|
struct list_head *head;
|
||||||
|
struct task_struct *p;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
head = &hb->chain;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(this, next, head, list) {
|
||||||
|
if (match_futex (&this->key, &me->key)) {
|
||||||
|
/*
|
||||||
|
* Another waiter already exists - bump up
|
||||||
|
* the refcount and return its pi_state:
|
||||||
|
*/
|
||||||
|
pi_state = this->pi_state;
|
||||||
|
atomic_inc(&pi_state->refcount);
|
||||||
|
me->pi_state = pi_state;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are the first waiter - try to look up the real owner and
|
||||||
|
* attach the new pi_state to it:
|
||||||
|
*/
|
||||||
|
pid = uval & FUTEX_TID_MASK;
|
||||||
|
p = futex_find_get_task(pid);
|
||||||
|
if (!p)
|
||||||
|
return -ESRCH;
|
||||||
|
|
||||||
|
pi_state = alloc_pi_state();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize the pi_mutex in locked state and make 'p'
|
||||||
|
* the owner of it:
|
||||||
|
*/
|
||||||
|
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
|
||||||
|
|
||||||
|
/* Store the key for possible exit cleanups: */
|
||||||
|
pi_state->key = me->key;
|
||||||
|
|
||||||
|
spin_lock_irq(&p->pi_lock);
|
||||||
|
list_add(&pi_state->list, &p->pi_state_list);
|
||||||
|
pi_state->owner = p;
|
||||||
|
spin_unlock_irq(&p->pi_lock);
|
||||||
|
|
||||||
|
put_task_struct(p);
|
||||||
|
|
||||||
|
me->pi_state = pi_state;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The hash bucket lock must be held when this is called.
|
* The hash bucket lock must be held when this is called.
|
||||||
* Afterwards, the futex_q must not be accessed.
|
* Afterwards, the futex_q must not be accessed.
|
||||||
@ -285,6 +542,70 @@ static void wake_futex(struct futex_q *q)
|
|||||||
q->lock_ptr = NULL;
|
q->lock_ptr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||||
|
{
|
||||||
|
struct task_struct *new_owner;
|
||||||
|
struct futex_pi_state *pi_state = this->pi_state;
|
||||||
|
u32 curval, newval;
|
||||||
|
|
||||||
|
if (!pi_state)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This happens when we have stolen the lock and the original
|
||||||
|
* pending owner did not enqueue itself back on the rt_mutex.
|
||||||
|
* Thats not a tragedy. We know that way, that a lock waiter
|
||||||
|
* is on the fly. We make the futex_q waiter the pending owner.
|
||||||
|
*/
|
||||||
|
if (!new_owner)
|
||||||
|
new_owner = this->task;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We pass it to the next owner. (The WAITERS bit is always
|
||||||
|
* kept enabled while there is PI state around. We must also
|
||||||
|
* preserve the owner died bit.)
|
||||||
|
*/
|
||||||
|
newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid;
|
||||||
|
|
||||||
|
inc_preempt_count();
|
||||||
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
||||||
|
dec_preempt_count();
|
||||||
|
|
||||||
|
if (curval == -EFAULT)
|
||||||
|
return -EFAULT;
|
||||||
|
if (curval != uval)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
list_del_init(&pi_state->owner->pi_state_list);
|
||||||
|
list_add(&pi_state->list, &new_owner->pi_state_list);
|
||||||
|
pi_state->owner = new_owner;
|
||||||
|
rt_mutex_unlock(&pi_state->pi_mutex);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
|
||||||
|
{
|
||||||
|
u32 oldval;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is no waiter, so we unlock the futex. The owner died
|
||||||
|
* bit has not to be preserved here. We are the owner:
|
||||||
|
*/
|
||||||
|
inc_preempt_count();
|
||||||
|
oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
|
||||||
|
dec_preempt_count();
|
||||||
|
|
||||||
|
if (oldval == -EFAULT)
|
||||||
|
return oldval;
|
||||||
|
if (oldval != uval)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake up all waiters hashed on the physical page that is mapped
|
* Wake up all waiters hashed on the physical page that is mapped
|
||||||
* to this virtual address:
|
* to this virtual address:
|
||||||
@ -309,6 +630,8 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
|
|||||||
|
|
||||||
list_for_each_entry_safe(this, next, head, list) {
|
list_for_each_entry_safe(this, next, head, list) {
|
||||||
if (match_futex (&this->key, &key)) {
|
if (match_futex (&this->key, &key)) {
|
||||||
|
if (this->pi_state)
|
||||||
|
return -EINVAL;
|
||||||
wake_futex(this);
|
wake_futex(this);
|
||||||
if (++ret >= nr_wake)
|
if (++ret >= nr_wake)
|
||||||
break;
|
break;
|
||||||
@ -385,27 +708,9 @@ retry:
|
|||||||
* still holding the mmap_sem.
|
* still holding the mmap_sem.
|
||||||
*/
|
*/
|
||||||
if (attempt++) {
|
if (attempt++) {
|
||||||
struct vm_area_struct * vma;
|
if (futex_handle_fault((unsigned long)uaddr2,
|
||||||
struct mm_struct *mm = current->mm;
|
attempt))
|
||||||
unsigned long address = (unsigned long)uaddr2;
|
|
||||||
|
|
||||||
ret = -EFAULT;
|
|
||||||
if (attempt >= 2 ||
|
|
||||||
!(vma = find_vma(mm, address)) ||
|
|
||||||
vma->vm_start > address ||
|
|
||||||
!(vma->vm_flags & VM_WRITE))
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
switch (handle_mm_fault(mm, vma, address, 1)) {
|
|
||||||
case VM_FAULT_MINOR:
|
|
||||||
current->min_flt++;
|
|
||||||
break;
|
|
||||||
case VM_FAULT_MAJOR:
|
|
||||||
current->maj_flt++;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -572,6 +877,7 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
|
|||||||
static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
|
static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||||
{
|
{
|
||||||
list_add_tail(&q->list, &hb->chain);
|
list_add_tail(&q->list, &hb->chain);
|
||||||
|
q->task = current;
|
||||||
spin_unlock(&hb->lock);
|
spin_unlock(&hb->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -626,6 +932,9 @@ static int unqueue_me(struct futex_q *q)
|
|||||||
}
|
}
|
||||||
WARN_ON(list_empty(&q->list));
|
WARN_ON(list_empty(&q->list));
|
||||||
list_del(&q->list);
|
list_del(&q->list);
|
||||||
|
|
||||||
|
BUG_ON(q->pi_state);
|
||||||
|
|
||||||
spin_unlock(lock_ptr);
|
spin_unlock(lock_ptr);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
}
|
}
|
||||||
@ -634,16 +943,36 @@ static int unqueue_me(struct futex_q *q)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PI futexes can not be requeued and must remove themself from the
|
||||||
|
* hash bucket. The hash bucket lock is held on entry and dropped here.
|
||||||
|
*/
|
||||||
|
static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||||
|
{
|
||||||
|
WARN_ON(list_empty(&q->list));
|
||||||
|
list_del(&q->list);
|
||||||
|
|
||||||
|
BUG_ON(!q->pi_state);
|
||||||
|
free_pi_state(q->pi_state);
|
||||||
|
q->pi_state = NULL;
|
||||||
|
|
||||||
|
spin_unlock(&hb->lock);
|
||||||
|
|
||||||
|
drop_key_refs(&q->key);
|
||||||
|
}
|
||||||
|
|
||||||
static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
|
static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
|
||||||
{
|
{
|
||||||
DECLARE_WAITQUEUE(wait, current);
|
struct task_struct *curr = current;
|
||||||
|
DECLARE_WAITQUEUE(wait, curr);
|
||||||
struct futex_hash_bucket *hb;
|
struct futex_hash_bucket *hb;
|
||||||
struct futex_q q;
|
struct futex_q q;
|
||||||
u32 uval;
|
u32 uval;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
q.pi_state = NULL;
|
||||||
retry:
|
retry:
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(&curr->mm->mmap_sem);
|
||||||
|
|
||||||
ret = get_futex_key(uaddr, &q.key);
|
ret = get_futex_key(uaddr, &q.key);
|
||||||
if (unlikely(ret != 0))
|
if (unlikely(ret != 0))
|
||||||
@ -680,7 +1009,7 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
|
|||||||
* If we would have faulted, release mmap_sem, fault it in and
|
* If we would have faulted, release mmap_sem, fault it in and
|
||||||
* start all over again.
|
* start all over again.
|
||||||
*/
|
*/
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
|
||||||
ret = get_user(uval, uaddr);
|
ret = get_user(uval, uaddr);
|
||||||
|
|
||||||
@ -688,11 +1017,9 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
|
|||||||
goto retry;
|
goto retry;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
if (uval != val) {
|
|
||||||
ret = -EWOULDBLOCK;
|
ret = -EWOULDBLOCK;
|
||||||
queue_unlock(&q, hb);
|
if (uval != val)
|
||||||
goto out_release_sem;
|
goto out_unlock_release_sem;
|
||||||
}
|
|
||||||
|
|
||||||
/* Only actually queue if *uaddr contained val. */
|
/* Only actually queue if *uaddr contained val. */
|
||||||
__queue_me(&q, hb);
|
__queue_me(&q, hb);
|
||||||
@ -701,7 +1028,7 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
|
|||||||
* Now the futex is queued and we have checked the data, we
|
* Now the futex is queued and we have checked the data, we
|
||||||
* don't want to hold mmap_sem while we sleep.
|
* don't want to hold mmap_sem while we sleep.
|
||||||
*/
|
*/
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There might have been scheduling since the queue_me(), as we
|
* There might have been scheduling since the queue_me(), as we
|
||||||
@ -739,8 +1066,415 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
|
|||||||
*/
|
*/
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
|
|
||||||
|
out_unlock_release_sem:
|
||||||
|
queue_unlock(&q, hb);
|
||||||
|
|
||||||
out_release_sem:
|
out_release_sem:
|
||||||
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Userspace tried a 0 -> TID atomic transition of the futex value
|
||||||
|
* and failed. The kernel side here does the whole locking operation:
|
||||||
|
* if there are waiters then it will block, it does PI, etc. (Due to
|
||||||
|
* races the kernel might see a 0 value of the futex too.)
|
||||||
|
*/
|
||||||
|
static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
|
||||||
|
struct hrtimer_sleeper *to)
|
||||||
|
{
|
||||||
|
struct task_struct *curr = current;
|
||||||
|
struct futex_hash_bucket *hb;
|
||||||
|
u32 uval, newval, curval;
|
||||||
|
struct futex_q q;
|
||||||
|
int ret, attempt = 0;
|
||||||
|
|
||||||
|
if (refill_pi_state_cache())
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
q.pi_state = NULL;
|
||||||
|
retry:
|
||||||
|
down_read(&curr->mm->mmap_sem);
|
||||||
|
|
||||||
|
ret = get_futex_key(uaddr, &q.key);
|
||||||
|
if (unlikely(ret != 0))
|
||||||
|
goto out_release_sem;
|
||||||
|
|
||||||
|
hb = queue_lock(&q, -1, NULL);
|
||||||
|
|
||||||
|
retry_locked:
|
||||||
|
/*
|
||||||
|
* To avoid races, we attempt to take the lock here again
|
||||||
|
* (by doing a 0 -> TID atomic cmpxchg), while holding all
|
||||||
|
* the locks. It will most likely not succeed.
|
||||||
|
*/
|
||||||
|
newval = current->pid;
|
||||||
|
|
||||||
|
inc_preempt_count();
|
||||||
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
|
||||||
|
dec_preempt_count();
|
||||||
|
|
||||||
|
if (unlikely(curval == -EFAULT))
|
||||||
|
goto uaddr_faulted;
|
||||||
|
|
||||||
|
/* We own the lock already */
|
||||||
|
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
|
||||||
|
if (!detect && 0)
|
||||||
|
force_sig(SIGKILL, current);
|
||||||
|
ret = -EDEADLK;
|
||||||
|
goto out_unlock_release_sem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Surprise - we got the lock. Just return
|
||||||
|
* to userspace:
|
||||||
|
*/
|
||||||
|
if (unlikely(!curval))
|
||||||
|
goto out_unlock_release_sem;
|
||||||
|
|
||||||
|
uval = curval;
|
||||||
|
newval = uval | FUTEX_WAITERS;
|
||||||
|
|
||||||
|
inc_preempt_count();
|
||||||
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
||||||
|
dec_preempt_count();
|
||||||
|
|
||||||
|
if (unlikely(curval == -EFAULT))
|
||||||
|
goto uaddr_faulted;
|
||||||
|
if (unlikely(curval != uval))
|
||||||
|
goto retry_locked;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We dont have the lock. Look up the PI state (or create it if
|
||||||
|
* we are the first waiter):
|
||||||
|
*/
|
||||||
|
ret = lookup_pi_state(uval, hb, &q);
|
||||||
|
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
/*
|
||||||
|
* There were no waiters and the owner task lookup
|
||||||
|
* failed. When the OWNER_DIED bit is set, then we
|
||||||
|
* know that this is a robust futex and we actually
|
||||||
|
* take the lock. This is safe as we are protected by
|
||||||
|
* the hash bucket lock. We also set the waiters bit
|
||||||
|
* unconditionally here, to simplify glibc handling of
|
||||||
|
* multiple tasks racing to acquire the lock and
|
||||||
|
* cleanup the problems which were left by the dead
|
||||||
|
* owner.
|
||||||
|
*/
|
||||||
|
if (curval & FUTEX_OWNER_DIED) {
|
||||||
|
uval = newval;
|
||||||
|
newval = current->pid |
|
||||||
|
FUTEX_OWNER_DIED | FUTEX_WAITERS;
|
||||||
|
|
||||||
|
inc_preempt_count();
|
||||||
|
curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
||||||
|
uval, newval);
|
||||||
|
dec_preempt_count();
|
||||||
|
|
||||||
|
if (unlikely(curval == -EFAULT))
|
||||||
|
goto uaddr_faulted;
|
||||||
|
if (unlikely(curval != uval))
|
||||||
|
goto retry_locked;
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
goto out_unlock_release_sem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only actually queue now that the atomic ops are done:
|
||||||
|
*/
|
||||||
|
__queue_me(&q, hb);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now the futex is queued and we have checked the data, we
|
||||||
|
* don't want to hold mmap_sem while we sleep.
|
||||||
|
*/
|
||||||
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
|
||||||
|
WARN_ON(!q.pi_state);
|
||||||
|
/*
|
||||||
|
* Block on the PI mutex:
|
||||||
|
*/
|
||||||
|
if (!trylock)
|
||||||
|
ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
|
||||||
|
else {
|
||||||
|
ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
|
||||||
|
/* Fixup the trylock return value: */
|
||||||
|
ret = ret ? 0 : -EWOULDBLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
down_read(&curr->mm->mmap_sem);
|
||||||
|
hb = queue_lock(&q, -1, NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Got the lock. We might not be the anticipated owner if we
|
||||||
|
* did a lock-steal - fix up the PI-state in that case.
|
||||||
|
*/
|
||||||
|
if (!ret && q.pi_state->owner != curr) {
|
||||||
|
u32 newtid = current->pid | FUTEX_WAITERS;
|
||||||
|
|
||||||
|
/* Owner died? */
|
||||||
|
if (q.pi_state->owner != NULL) {
|
||||||
|
spin_lock_irq(&q.pi_state->owner->pi_lock);
|
||||||
|
list_del_init(&q.pi_state->list);
|
||||||
|
spin_unlock_irq(&q.pi_state->owner->pi_lock);
|
||||||
|
} else
|
||||||
|
newtid |= FUTEX_OWNER_DIED;
|
||||||
|
|
||||||
|
q.pi_state->owner = current;
|
||||||
|
|
||||||
|
spin_lock_irq(¤t->pi_lock);
|
||||||
|
list_add(&q.pi_state->list, ¤t->pi_state_list);
|
||||||
|
spin_unlock_irq(¤t->pi_lock);
|
||||||
|
|
||||||
|
/* Unqueue and drop the lock */
|
||||||
|
unqueue_me_pi(&q, hb);
|
||||||
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
/*
|
||||||
|
* We own it, so we have to replace the pending owner
|
||||||
|
* TID. This must be atomic as we have preserve the
|
||||||
|
* owner died bit here.
|
||||||
|
*/
|
||||||
|
ret = get_user(uval, uaddr);
|
||||||
|
while (!ret) {
|
||||||
|
newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
||||||
|
curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
||||||
|
uval, newval);
|
||||||
|
if (curval == -EFAULT)
|
||||||
|
ret = -EFAULT;
|
||||||
|
if (curval == uval)
|
||||||
|
break;
|
||||||
|
uval = curval;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Catch the rare case, where the lock was released
|
||||||
|
* when we were on the way back before we locked
|
||||||
|
* the hash bucket.
|
||||||
|
*/
|
||||||
|
if (ret && q.pi_state->owner == curr) {
|
||||||
|
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
/* Unqueue and drop the lock */
|
||||||
|
unqueue_me_pi(&q, hb);
|
||||||
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!detect && ret == -EDEADLK && 0)
|
||||||
|
force_sig(SIGKILL, current);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
out_unlock_release_sem:
|
||||||
|
queue_unlock(&q, hb);
|
||||||
|
|
||||||
|
out_release_sem:
|
||||||
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
uaddr_faulted:
|
||||||
|
/*
|
||||||
|
* We have to r/w *(int __user *)uaddr, but we can't modify it
|
||||||
|
* non-atomically. Therefore, if get_user below is not
|
||||||
|
* enough, we need to handle the fault ourselves, while
|
||||||
|
* still holding the mmap_sem.
|
||||||
|
*/
|
||||||
|
if (attempt++) {
|
||||||
|
if (futex_handle_fault((unsigned long)uaddr, attempt))
|
||||||
|
goto out_unlock_release_sem;
|
||||||
|
|
||||||
|
goto retry_locked;
|
||||||
|
}
|
||||||
|
|
||||||
|
queue_unlock(&q, hb);
|
||||||
|
up_read(&curr->mm->mmap_sem);
|
||||||
|
|
||||||
|
ret = get_user(uval, uaddr);
|
||||||
|
if (!ret && (uval != -EFAULT))
|
||||||
|
goto retry;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Restart handler
|
||||||
|
*/
|
||||||
|
static long futex_lock_pi_restart(struct restart_block *restart)
|
||||||
|
{
|
||||||
|
struct hrtimer_sleeper timeout, *to = NULL;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
restart->fn = do_no_restart_syscall;
|
||||||
|
|
||||||
|
if (restart->arg2 || restart->arg3) {
|
||||||
|
to = &timeout;
|
||||||
|
hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
|
||||||
|
hrtimer_init_sleeper(to, current);
|
||||||
|
to->timer.expires.tv64 = ((u64)restart->arg1 << 32) |
|
||||||
|
(u64) restart->arg0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_debug("lock_pi restart: %p, %d (%d)\n",
|
||||||
|
(u32 __user *)restart->arg0, current->pid);
|
||||||
|
|
||||||
|
ret = do_futex_lock_pi((u32 __user *)restart->arg0, restart->arg1,
|
||||||
|
0, to);
|
||||||
|
|
||||||
|
if (ret != -EINTR)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
restart->fn = futex_lock_pi_restart;
|
||||||
|
|
||||||
|
/* The other values are filled in */
|
||||||
|
return -ERESTART_RESTARTBLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the syscall entry below.
|
||||||
|
*/
|
||||||
|
static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
|
||||||
|
long nsec, int trylock)
|
||||||
|
{
|
||||||
|
struct hrtimer_sleeper timeout, *to = NULL;
|
||||||
|
struct restart_block *restart;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (sec != MAX_SCHEDULE_TIMEOUT) {
|
||||||
|
to = &timeout;
|
||||||
|
hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
|
||||||
|
hrtimer_init_sleeper(to, current);
|
||||||
|
to->timer.expires = ktime_set(sec, nsec);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = do_futex_lock_pi(uaddr, detect, trylock, to);
|
||||||
|
|
||||||
|
if (ret != -EINTR)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
pr_debug("lock_pi interrupted: %p, %d (%d)\n", uaddr, current->pid);
|
||||||
|
|
||||||
|
restart = ¤t_thread_info()->restart_block;
|
||||||
|
restart->fn = futex_lock_pi_restart;
|
||||||
|
restart->arg0 = (unsigned long) uaddr;
|
||||||
|
restart->arg1 = detect;
|
||||||
|
if (to) {
|
||||||
|
restart->arg2 = to->timer.expires.tv64 & 0xFFFFFFFF;
|
||||||
|
restart->arg3 = to->timer.expires.tv64 >> 32;
|
||||||
|
} else
|
||||||
|
restart->arg2 = restart->arg3 = 0;
|
||||||
|
|
||||||
|
return -ERESTART_RESTARTBLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Userspace attempted a TID -> 0 atomic transition, and failed.
|
||||||
|
* This is the in-kernel slowpath: we look up the PI state (if any),
|
||||||
|
* and do the rt-mutex unlock.
|
||||||
|
*/
|
||||||
|
static int futex_unlock_pi(u32 __user *uaddr)
|
||||||
|
{
|
||||||
|
struct futex_hash_bucket *hb;
|
||||||
|
struct futex_q *this, *next;
|
||||||
|
u32 uval;
|
||||||
|
struct list_head *head;
|
||||||
|
union futex_key key;
|
||||||
|
int ret, attempt = 0;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
if (get_user(uval, uaddr))
|
||||||
|
return -EFAULT;
|
||||||
|
/*
|
||||||
|
* We release only a lock we actually own:
|
||||||
|
*/
|
||||||
|
if ((uval & FUTEX_TID_MASK) != current->pid)
|
||||||
|
return -EPERM;
|
||||||
|
/*
|
||||||
|
* First take all the futex related locks:
|
||||||
|
*/
|
||||||
|
down_read(¤t->mm->mmap_sem);
|
||||||
|
|
||||||
|
ret = get_futex_key(uaddr, &key);
|
||||||
|
if (unlikely(ret != 0))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
hb = hash_futex(&key);
|
||||||
|
spin_lock(&hb->lock);
|
||||||
|
|
||||||
|
retry_locked:
|
||||||
|
/*
|
||||||
|
* To avoid races, try to do the TID -> 0 atomic transition
|
||||||
|
* again. If it succeeds then we can return without waking
|
||||||
|
* anyone else up:
|
||||||
|
*/
|
||||||
|
inc_preempt_count();
|
||||||
|
uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
|
||||||
|
dec_preempt_count();
|
||||||
|
|
||||||
|
if (unlikely(uval == -EFAULT))
|
||||||
|
goto pi_faulted;
|
||||||
|
/*
|
||||||
|
* Rare case: we managed to release the lock atomically,
|
||||||
|
* no need to wake anyone else up:
|
||||||
|
*/
|
||||||
|
if (unlikely(uval == current->pid))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ok, other tasks may need to be woken up - check waiters
|
||||||
|
* and do the wakeup if necessary:
|
||||||
|
*/
|
||||||
|
head = &hb->chain;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(this, next, head, list) {
|
||||||
|
if (!match_futex (&this->key, &key))
|
||||||
|
continue;
|
||||||
|
ret = wake_futex_pi(uaddr, uval, this);
|
||||||
|
/*
|
||||||
|
* The atomic access to the futex value
|
||||||
|
* generated a pagefault, so retry the
|
||||||
|
* user-access and the wakeup:
|
||||||
|
*/
|
||||||
|
if (ret == -EFAULT)
|
||||||
|
goto pi_faulted;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* No waiters - kernel unlocks the futex:
|
||||||
|
*/
|
||||||
|
ret = unlock_futex_pi(uaddr, uval);
|
||||||
|
if (ret == -EFAULT)
|
||||||
|
goto pi_faulted;
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock(&hb->lock);
|
||||||
|
out:
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
pi_faulted:
|
||||||
|
/*
|
||||||
|
* We have to r/w *(int __user *)uaddr, but we can't modify it
|
||||||
|
* non-atomically. Therefore, if get_user below is not
|
||||||
|
* enough, we need to handle the fault ourselves, while
|
||||||
|
* still holding the mmap_sem.
|
||||||
|
*/
|
||||||
|
if (attempt++) {
|
||||||
|
if (futex_handle_fault((unsigned long)uaddr, attempt))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
goto retry_locked;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&hb->lock);
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
|
||||||
|
ret = get_user(uval, uaddr);
|
||||||
|
if (!ret && (uval != -EFAULT))
|
||||||
|
goto retry;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -819,6 +1553,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
|
|||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
q->pi_state = NULL;
|
||||||
|
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
err = get_futex_key(uaddr, &q->key);
|
err = get_futex_key(uaddr, &q->key);
|
||||||
@ -856,7 +1591,7 @@ error:
|
|||||||
* Implementation: user-space maintains a per-thread list of locks it
|
* Implementation: user-space maintains a per-thread list of locks it
|
||||||
* is holding. Upon do_exit(), the kernel carefully walks this list,
|
* is holding. Upon do_exit(), the kernel carefully walks this list,
|
||||||
* and marks all locks that are owned by this thread with the
|
* and marks all locks that are owned by this thread with the
|
||||||
* FUTEX_OWNER_DEAD bit, and wakes up a waiter (if any). The list is
|
* FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
|
||||||
* always manipulated with the lock held, so the list is private and
|
* always manipulated with the lock held, so the list is private and
|
||||||
* per-thread. Userspace also maintains a per-thread 'list_op_pending'
|
* per-thread. Userspace also maintains a per-thread 'list_op_pending'
|
||||||
* field, to allow the kernel to clean up if the thread dies after
|
* field, to allow the kernel to clean up if the thread dies after
|
||||||
@ -931,7 +1666,7 @@ err_unlock:
|
|||||||
*/
|
*/
|
||||||
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
|
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
|
||||||
{
|
{
|
||||||
u32 uval;
|
u32 uval, nval;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
if (get_user(uval, uaddr))
|
if (get_user(uval, uaddr))
|
||||||
@ -948,8 +1683,12 @@ retry:
|
|||||||
* thread-death.) The rest of the cleanup is done in
|
* thread-death.) The rest of the cleanup is done in
|
||||||
* userspace.
|
* userspace.
|
||||||
*/
|
*/
|
||||||
if (futex_atomic_cmpxchg_inatomic(uaddr, uval,
|
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
|
||||||
uval | FUTEX_OWNER_DIED) != uval)
|
uval | FUTEX_OWNER_DIED);
|
||||||
|
if (nval == -EFAULT)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (nval != uval)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
if (uval & FUTEX_WAITERS)
|
if (uval & FUTEX_WAITERS)
|
||||||
@ -994,7 +1733,7 @@ void exit_robust_list(struct task_struct *curr)
|
|||||||
while (entry != &head->list) {
|
while (entry != &head->list) {
|
||||||
/*
|
/*
|
||||||
* A pending lock might already be on the list, so
|
* A pending lock might already be on the list, so
|
||||||
* dont process it twice:
|
* don't process it twice:
|
||||||
*/
|
*/
|
||||||
if (entry != pending)
|
if (entry != pending)
|
||||||
if (handle_futex_death((void *)entry + futex_offset,
|
if (handle_futex_death((void *)entry + futex_offset,
|
||||||
@ -1040,6 +1779,15 @@ long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
|
|||||||
case FUTEX_WAKE_OP:
|
case FUTEX_WAKE_OP:
|
||||||
ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
|
ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
|
||||||
break;
|
break;
|
||||||
|
case FUTEX_LOCK_PI:
|
||||||
|
ret = futex_lock_pi(uaddr, val, timeout, val2, 0);
|
||||||
|
break;
|
||||||
|
case FUTEX_UNLOCK_PI:
|
||||||
|
ret = futex_unlock_pi(uaddr);
|
||||||
|
break;
|
||||||
|
case FUTEX_TRYLOCK_PI:
|
||||||
|
ret = futex_lock_pi(uaddr, 0, timeout, val2, 1);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -ENOSYS;
|
ret = -ENOSYS;
|
||||||
}
|
}
|
||||||
@ -1055,17 +1803,22 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
|
|||||||
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
||||||
u32 val2 = 0;
|
u32 val2 = 0;
|
||||||
|
|
||||||
if (utime && (op == FUTEX_WAIT)) {
|
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
|
||||||
if (copy_from_user(&t, utime, sizeof(t)) != 0)
|
if (copy_from_user(&t, utime, sizeof(t)) != 0)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
if (!timespec_valid(&t))
|
if (!timespec_valid(&t))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (op == FUTEX_WAIT)
|
||||||
timeout = timespec_to_jiffies(&t) + 1;
|
timeout = timespec_to_jiffies(&t) + 1;
|
||||||
|
else {
|
||||||
|
timeout = t.tv_sec;
|
||||||
|
val2 = t.tv_nsec;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
|
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
|
||||||
*/
|
*/
|
||||||
if (op >= FUTEX_REQUEUE)
|
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
|
||||||
val2 = (u32) (unsigned long) utime;
|
val2 = (u32) (unsigned long) utime;
|
||||||
|
|
||||||
return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
|
return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
|
||||||
|
@ -129,14 +129,19 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
|
|||||||
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
||||||
int val2 = 0;
|
int val2 = 0;
|
||||||
|
|
||||||
if (utime && (op == FUTEX_WAIT)) {
|
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
|
||||||
if (get_compat_timespec(&t, utime))
|
if (get_compat_timespec(&t, utime))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
if (!timespec_valid(&t))
|
if (!timespec_valid(&t))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (op == FUTEX_WAIT)
|
||||||
timeout = timespec_to_jiffies(&t) + 1;
|
timeout = timespec_to_jiffies(&t) + 1;
|
||||||
|
else {
|
||||||
|
timeout = t.tv_sec;
|
||||||
|
val2 = t.tv_nsec;
|
||||||
}
|
}
|
||||||
if (op >= FUTEX_REQUEUE)
|
}
|
||||||
|
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
|
||||||
val2 = (int) (unsigned long) utime;
|
val2 = (int) (unsigned long) utime;
|
||||||
|
|
||||||
return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
|
return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
|
||||||
|
@ -112,4 +112,12 @@ static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
|
|||||||
return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
|
return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PI-futex support (proxy locking functions, etc.):
|
||||||
|
*/
|
||||||
|
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
||||||
|
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||||
|
struct task_struct *proxy_owner);
|
||||||
|
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||||
|
struct task_struct *proxy_owner);
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user