Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  futex: Move drop_futex_key_refs out of spinlock'ed region
  rcu: Fix TREE_PREEMPT_RCU CPU_HOTPLUG bad-luck hang
  rcu: Stopgap fix for synchronize_rcu_expedited() for TREE_PREEMPT_RCU
  rcu: Prevent RCU IPI storms in presence of high call_rcu() load
  futex: Check for NULL keys in match_futex
  futex: Handle spurious wake up
This commit is contained in:
Linus Torvalds 2009-10-29 08:12:20 -07:00
commit fefcfd431b
5 changed files with 103 additions and 27 deletions

View File

@ -76,11 +76,7 @@ static inline void __rcu_read_unlock_bh(void)
extern void call_rcu_sched(struct rcu_head *head, extern void call_rcu_sched(struct rcu_head *head,
void (*func)(struct rcu_head *rcu)); void (*func)(struct rcu_head *rcu));
extern void synchronize_rcu_expedited(void);
static inline void synchronize_rcu_expedited(void)
{
synchronize_sched_expedited();
}
static inline void synchronize_rcu_bh_expedited(void) static inline void synchronize_rcu_bh_expedited(void)
{ {

View File

@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
*/ */
static inline int match_futex(union futex_key *key1, union futex_key *key2) static inline int match_futex(union futex_key *key1, union futex_key *key2)
{ {
return (key1->both.word == key2->both.word return (key1 && key2
&& key1->both.word == key2->both.word
&& key1->both.ptr == key2->both.ptr && key1->both.ptr == key2->both.ptr
&& key1->both.offset == key2->both.offset); && key1->both.offset == key2->both.offset);
} }
@ -1028,7 +1029,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb) struct futex_hash_bucket *hb)
{ {
drop_futex_key_refs(&q->key);
get_futex_key_refs(key); get_futex_key_refs(key);
q->key = *key; q->key = *key;
@ -1226,6 +1226,7 @@ retry_private:
*/ */
if (ret == 1) { if (ret == 1) {
WARN_ON(pi_state); WARN_ON(pi_state);
drop_count++;
task_count++; task_count++;
ret = get_futex_value_locked(&curval2, uaddr2); ret = get_futex_value_locked(&curval2, uaddr2);
if (!ret) if (!ret)
@ -1304,6 +1305,7 @@ retry_private:
if (ret == 1) { if (ret == 1) {
/* We got the lock. */ /* We got the lock. */
requeue_pi_wake_futex(this, &key2, hb2); requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue; continue;
} else if (ret) { } else if (ret) {
/* -EDEADLK */ /* -EDEADLK */
@ -1791,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
current->timer_slack_ns); current->timer_slack_ns);
} }
retry:
/* Prepare to wait on uaddr. */ /* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret) if (ret)
@ -1808,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
goto out_put_key; goto out_put_key;
/* /*
* We expect signal_pending(current), but another thread may * We expect signal_pending(current), but we might be the
* have handled it for us already. * victim of a spurious wakeup as well.
*/ */
if (!signal_pending(current)) {
put_futex_key(fshared, &q.key);
goto retry;
}
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (!abs_time) if (!abs_time)
goto out_put_key; goto out_put_key;
@ -2118,9 +2126,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
*/ */
plist_del(&q->list, &q->list.plist); plist_del(&q->list, &q->list.plist);
/* Handle spurious wakeups gracefully */
ret = -EAGAIN;
if (timeout && !timeout->task) if (timeout && !timeout->task)
ret = -ETIMEDOUT; ret = -ETIMEDOUT;
else else if (signal_pending(current))
ret = -ERESTARTNOINTR; ret = -ERESTARTNOINTR;
} }
return ret; return ret;
@ -2198,6 +2208,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
debug_rt_mutex_init_waiter(&rt_waiter); debug_rt_mutex_init_waiter(&rt_waiter);
rt_waiter.task = NULL; rt_waiter.task = NULL;
retry:
key2 = FUTEX_KEY_INIT; key2 = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
@ -2292,6 +2303,9 @@ out_put_keys:
out_key2: out_key2:
put_futex_key(fshared, &key2); put_futex_key(fshared, &key2);
/* Spurious wakeup ? */
if (ret == -EAGAIN)
goto retry;
out: out:
if (to) { if (to) {
hrtimer_cancel(&to->timer); hrtimer_cancel(&to->timer);

View File

@ -913,7 +913,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
spin_unlock(&rnp->lock); /* irqs remain disabled. */ spin_unlock(&rnp->lock); /* irqs remain disabled. */
break; break;
} }
rcu_preempt_offline_tasks(rsp, rnp, rdp);
/*
* If there was a task blocking the current grace period,
* and if all CPUs have checked in, we need to propagate
* the quiescent state up the rcu_node hierarchy. But that
* is inconvenient at the moment due to deadlock issues if
* this should end the current grace period. So set the
* offlined CPU's bit in ->qsmask in order to force the
* next force_quiescent_state() invocation to clean up this
* mess in a deadlock-free manner.
*/
if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask)
rnp->qsmask |= mask;
mask = rnp->grpmask; mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs remain disabled. */ spin_unlock(&rnp->lock); /* irqs remain disabled. */
rnp = rnp->parent; rnp = rnp->parent;
@ -958,7 +971,7 @@ static void rcu_offline_cpu(int cpu)
* Invoke any RCU callbacks that have made it to the end of their grace * Invoke any RCU callbacks that have made it to the end of their grace
* period. Thottle as specified by rdp->blimit. * period. Thottle as specified by rdp->blimit.
*/ */
static void rcu_do_batch(struct rcu_data *rdp) static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
{ {
unsigned long flags; unsigned long flags;
struct rcu_head *next, *list, **tail; struct rcu_head *next, *list, **tail;
@ -1011,6 +1024,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
rdp->blimit = blimit; rdp->blimit = blimit;
/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
rdp->qlen_last_fqs_check = rdp->qlen;
local_irq_restore(flags); local_irq_restore(flags);
/* Re-raise the RCU softirq if there are callbacks remaining. */ /* Re-raise the RCU softirq if there are callbacks remaining. */
@ -1224,7 +1244,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
} }
/* If there are callbacks ready, invoke them. */ /* If there are callbacks ready, invoke them. */
rcu_do_batch(rdp); rcu_do_batch(rsp, rdp);
} }
/* /*
@ -1288,10 +1308,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
} }
/* Force the grace period if too many callbacks or too long waiting. */ /*
if (unlikely(++rdp->qlen > qhimark)) { * Force the grace period if too many callbacks or too long waiting.
* Enforce hysteresis, and don't invoke force_quiescent_state()
* if some other CPU has recently done so. Also, don't bother
* invoking force_quiescent_state() if the newly enqueued callback
* is the only one waiting for a grace period to complete.
*/
if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
rdp->blimit = LONG_MAX; rdp->blimit = LONG_MAX;
force_quiescent_state(rsp, 0); if (rsp->n_force_qs == rdp->n_force_qs_snap &&
*rdp->nxttail[RCU_DONE_TAIL] != head)
force_quiescent_state(rsp, 0);
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->qlen_last_fqs_check = rdp->qlen;
} else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
force_quiescent_state(rsp, 1); force_quiescent_state(rsp, 1);
local_irq_restore(flags); local_irq_restore(flags);
@ -1523,6 +1553,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
rdp->beenonline = 1; /* We have now been online. */ rdp->beenonline = 1; /* We have now been online. */
rdp->preemptable = preemptable; rdp->preemptable = preemptable;
rdp->passed_quiesc_completed = lastcomp - 1; rdp->passed_quiesc_completed = lastcomp - 1;
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit; rdp->blimit = blimit;
spin_unlock(&rnp->lock); /* irqs remain disabled. */ spin_unlock(&rnp->lock); /* irqs remain disabled. */

View File

@ -167,6 +167,10 @@ struct rcu_data {
struct rcu_head *nxtlist; struct rcu_head *nxtlist;
struct rcu_head **nxttail[RCU_NEXT_SIZE]; struct rcu_head **nxttail[RCU_NEXT_SIZE];
long qlen; /* # of queued callbacks */ long qlen; /* # of queued callbacks */
long qlen_last_fqs_check;
/* qlen at last check for QS forcing */
unsigned long n_force_qs_snap;
/* did other CPU force QS recently? */
long blimit; /* Upper limit on a processed batch */ long blimit; /* Upper limit on a processed batch */
#ifdef CONFIG_NO_HZ #ifdef CONFIG_NO_HZ
@ -302,9 +306,9 @@ static void rcu_print_task_stall(struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
static void rcu_preempt_offline_tasks(struct rcu_state *rsp, static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_node *rnp,
struct rcu_data *rdp); struct rcu_data *rdp);
static void rcu_preempt_offline_cpu(int cpu); static void rcu_preempt_offline_cpu(int cpu);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */ #endif /* #ifdef CONFIG_HOTPLUG_CPU */
static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_check_callbacks(int cpu);

View File

@ -304,21 +304,25 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
* parent is to remove the need for rcu_read_unlock_special() to * parent is to remove the need for rcu_read_unlock_special() to
* make more than two attempts to acquire the target rcu_node's lock. * make more than two attempts to acquire the target rcu_node's lock.
* *
* Returns 1 if there was previously a task blocking the current grace
* period on the specified rcu_node structure.
*
* The caller must hold rnp->lock with irqs disabled. * The caller must hold rnp->lock with irqs disabled.
*/ */
static void rcu_preempt_offline_tasks(struct rcu_state *rsp, static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_node *rnp,
struct rcu_data *rdp) struct rcu_data *rdp)
{ {
int i; int i;
struct list_head *lp; struct list_head *lp;
struct list_head *lp_root; struct list_head *lp_root;
int retval = rcu_preempted_readers(rnp);
struct rcu_node *rnp_root = rcu_get_root(rsp); struct rcu_node *rnp_root = rcu_get_root(rsp);
struct task_struct *tp; struct task_struct *tp;
if (rnp == rnp_root) { if (rnp == rnp_root) {
WARN_ONCE(1, "Last CPU thought to be offlined?"); WARN_ONCE(1, "Last CPU thought to be offlined?");
return; /* Shouldn't happen: at least one CPU online. */ return 0; /* Shouldn't happen: at least one CPU online. */
} }
WARN_ON_ONCE(rnp != rdp->mynode && WARN_ON_ONCE(rnp != rdp->mynode &&
(!list_empty(&rnp->blocked_tasks[0]) || (!list_empty(&rnp->blocked_tasks[0]) ||
@ -342,6 +346,8 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
spin_unlock(&rnp_root->lock); /* irqs remain disabled */ spin_unlock(&rnp_root->lock); /* irqs remain disabled */
} }
} }
return retval;
} }
/* /*
@ -392,6 +398,17 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
} }
EXPORT_SYMBOL_GPL(call_rcu); EXPORT_SYMBOL_GPL(call_rcu);
/*
* Wait for an rcu-preempt grace period. We are supposed to expedite the
* grace period, but this is the crude slow compatability hack, so just
* invoke synchronize_rcu().
*/
void synchronize_rcu_expedited(void)
{
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
/* /*
* Check to see if there is any immediate preemptable-RCU-related work * Check to see if there is any immediate preemptable-RCU-related work
* to be done. * to be done.
@ -521,12 +538,15 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
/* /*
* Because preemptable RCU does not exist, it never needs to migrate * Because preemptable RCU does not exist, it never needs to migrate
* tasks that were blocked within RCU read-side critical sections. * tasks that were blocked within RCU read-side critical sections, and
* such non-existent tasks cannot possibly have been blocking the current
* grace period.
*/ */
static void rcu_preempt_offline_tasks(struct rcu_state *rsp, static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_node *rnp,
struct rcu_data *rdp) struct rcu_data *rdp)
{ {
return 0;
} }
/* /*
@ -564,6 +584,16 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
} }
EXPORT_SYMBOL_GPL(call_rcu); EXPORT_SYMBOL_GPL(call_rcu);
/*
* Wait for an rcu-preempt grace period, but make it happen quickly.
* But because preemptable RCU does not exist, map to rcu-sched.
*/
void synchronize_rcu_expedited(void)
{
synchronize_sched_expedited();
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
/* /*
* Because preemptable RCU does not exist, it never has any work to do. * Because preemptable RCU does not exist, it never has any work to do.
*/ */