task_work: Make task_work_add() lockless

Change task_work's to use llist-like code to avoid pi_lock
in task_work_add(), this makes it useable under rq->lock.

task_work_cancel() and task_work_run() still use pi_lock
to synchronize with each other.

(This is in preparation for a deadlock fix.)

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20120826191209.GA4221@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Oleg Nesterov 2012-08-26 21:12:09 +02:00 committed by Ingo Molnar
parent 15674868d6
commit ac3d0da8f3

View File

@ -3,25 +3,18 @@
#include <linux/tracehook.h> #include <linux/tracehook.h>
int int
task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
{ {
struct callback_head *last, *first; struct callback_head *head;
unsigned long flags;
/* /*
* Not inserting the new work if the task has already passed * Not inserting the new work if the task has already passed
* exit_task_work() is the responisbility of callers. * exit_task_work() is the responisbility of callers.
*/ */
raw_spin_lock_irqsave(&task->pi_lock, flags); do {
last = task->task_works; head = ACCESS_ONCE(task->task_works);
first = last ? last->next : twork; work->next = head;
twork->next = first; } while (cmpxchg(&task->task_works, head, work) != head);
if (last)
last->next = twork;
task->task_works = twork;
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
if (notify) if (notify)
set_notify_resume(task); set_notify_resume(task);
return 0; return 0;
@ -30,52 +23,60 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify
struct callback_head * struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func) task_work_cancel(struct task_struct *task, task_work_func_t func)
{ {
struct callback_head **pprev = &task->task_works;
struct callback_head *work = NULL;
unsigned long flags; unsigned long flags;
struct callback_head *last, *res = NULL; /*
* If cmpxchg() fails we continue without updating pprev.
* Either we raced with task_work_add() which added the
* new entry before this work, we will find it again. Or
* we raced with task_work_run(), *pprev == NULL.
*/
raw_spin_lock_irqsave(&task->pi_lock, flags); raw_spin_lock_irqsave(&task->pi_lock, flags);
last = task->task_works; while ((work = ACCESS_ONCE(*pprev))) {
if (last) { read_barrier_depends();
struct callback_head *q = last, *p = q->next; if (work->func != func)
while (1) { pprev = &work->next;
if (p->func == func) { else if (cmpxchg(pprev, work, work->next) == work)
q->next = p->next; break;
if (p == last)
task->task_works = q == p ? NULL : q;
res = p;
break;
}
if (p == last)
break;
q = p;
p = q->next;
}
} }
raw_spin_unlock_irqrestore(&task->pi_lock, flags); raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return res;
return work;
} }
void task_work_run(void) void task_work_run(void)
{ {
struct task_struct *task = current; struct task_struct *task = current;
struct callback_head *p, *q; struct callback_head *work, *head, *next;
while (1) { for (;;) {
raw_spin_lock_irq(&task->pi_lock); work = xchg(&task->task_works, NULL);
p = task->task_works; if (!work)
task->task_works = NULL; break;
raw_spin_unlock_irq(&task->pi_lock); /*
* Synchronize with task_work_cancel(). It can't remove
* the first entry == work, cmpxchg(task_works) should
* fail, but it can play with *work and other entries.
*/
raw_spin_unlock_wait(&task->pi_lock);
smp_mb();
if (unlikely(!p)) /* Reverse the list to run the works in fifo order */
return; head = NULL;
do {
next = work->next;
work->next = head;
head = work;
work = next;
} while (work);
q = p->next; /* head */ work = head;
p->next = NULL; /* cut it */ do {
while (q) { next = work->next;
p = q->next; work->func(work);
q->func(q); work = next;
q = p;
cond_resched(); cond_resched();
} } while (work);
} }
} }