rcu: Consolidate tree setup for synchronize_rcu_expedited()
This commit replaces sync_rcu_preempt_exp_init1(() and sync_rcu_preempt_exp_init2() with sync_exp_reset_tree_hotplug() and sync_exp_reset_tree(), which will also be used by synchronize_sched_expedited(), and sync_rcu_exp_select_nodes(), which contains code specific to synchronize_rcu_expedited(). Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
@@ -3379,6 +3379,87 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
|
|||||||
return rcu_seq_done(&rsp->expedited_sequence, s);
|
return rcu_seq_done(&rsp->expedited_sequence, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset the ->expmaskinit values in the rcu_node tree to reflect any
|
||||||
|
* recent CPU-online activity. Note that these masks are not cleared
|
||||||
|
* when CPUs go offline, so they reflect the union of all CPUs that have
|
||||||
|
* ever been online. This means that this function normally takes its
|
||||||
|
* no-work-to-do fastpath.
|
||||||
|
*/
|
||||||
|
static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
|
||||||
|
{
|
||||||
|
bool done;
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned long mask;
|
||||||
|
unsigned long oldmask;
|
||||||
|
int ncpus = READ_ONCE(rsp->ncpus);
|
||||||
|
struct rcu_node *rnp;
|
||||||
|
struct rcu_node *rnp_up;
|
||||||
|
|
||||||
|
/* If no new CPUs onlined since last time, nothing to do. */
|
||||||
|
if (likely(ncpus == rsp->ncpus_snap))
|
||||||
|
return;
|
||||||
|
rsp->ncpus_snap = ncpus;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Each pass through the following loop propagates newly onlined
|
||||||
|
* CPUs for the current rcu_node structure up the rcu_node tree.
|
||||||
|
*/
|
||||||
|
rcu_for_each_leaf_node(rsp, rnp) {
|
||||||
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||||
|
smp_mb__after_unlock_lock();
|
||||||
|
if (rnp->expmaskinit == rnp->expmaskinitnext) {
|
||||||
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||||
|
continue; /* No new CPUs, nothing to do. */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update this node's mask, track old value for propagation. */
|
||||||
|
oldmask = rnp->expmaskinit;
|
||||||
|
rnp->expmaskinit = rnp->expmaskinitnext;
|
||||||
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||||
|
|
||||||
|
/* If was already nonzero, nothing to propagate. */
|
||||||
|
if (oldmask)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Propagate the new CPU up the tree. */
|
||||||
|
mask = rnp->grpmask;
|
||||||
|
rnp_up = rnp->parent;
|
||||||
|
done = false;
|
||||||
|
while (rnp_up) {
|
||||||
|
raw_spin_lock_irqsave(&rnp_up->lock, flags);
|
||||||
|
smp_mb__after_unlock_lock();
|
||||||
|
if (rnp_up->expmaskinit)
|
||||||
|
done = true;
|
||||||
|
rnp_up->expmaskinit |= mask;
|
||||||
|
raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
|
||||||
|
if (done)
|
||||||
|
break;
|
||||||
|
mask = rnp_up->grpmask;
|
||||||
|
rnp_up = rnp_up->parent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset the ->expmask values in the rcu_node tree in preparation for
|
||||||
|
* a new expedited grace period.
|
||||||
|
*/
|
||||||
|
static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
struct rcu_node *rnp;
|
||||||
|
|
||||||
|
sync_exp_reset_tree_hotplug(rsp);
|
||||||
|
rcu_for_each_node_breadth_first(rsp, rnp) {
|
||||||
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||||
|
smp_mb__after_unlock_lock();
|
||||||
|
WARN_ON_ONCE(rnp->expmask);
|
||||||
|
rnp->expmask = rnp->expmaskinit;
|
||||||
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return non-zero if there are any tasks in RCU read-side critical
|
* Return non-zero if there are any tasks in RCU read-side critical
|
||||||
* sections blocking the current preemptible-RCU expedited grace period.
|
* sections blocking the current preemptible-RCU expedited grace period.
|
||||||
@@ -3971,7 +4052,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
|||||||
|
|
||||||
/* Set up local state, ensuring consistent view of global state. */
|
/* Set up local state, ensuring consistent view of global state. */
|
||||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||||
rdp->beenonline = 1; /* We have now been online. */
|
|
||||||
rdp->qlen_last_fqs_check = 0;
|
rdp->qlen_last_fqs_check = 0;
|
||||||
rdp->n_force_qs_snap = rsp->n_force_qs;
|
rdp->n_force_qs_snap = rsp->n_force_qs;
|
||||||
rdp->blimit = blimit;
|
rdp->blimit = blimit;
|
||||||
@@ -3993,6 +4073,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
|||||||
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
|
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
|
||||||
smp_mb__after_unlock_lock();
|
smp_mb__after_unlock_lock();
|
||||||
rnp->qsmaskinitnext |= mask;
|
rnp->qsmaskinitnext |= mask;
|
||||||
|
rnp->expmaskinitnext |= mask;
|
||||||
|
if (!rdp->beenonline)
|
||||||
|
WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
|
||||||
|
rdp->beenonline = true; /* We have now been online. */
|
||||||
rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
|
rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
|
||||||
rdp->completed = rnp->completed;
|
rdp->completed = rnp->completed;
|
||||||
rdp->passed_quiesce = false;
|
rdp->passed_quiesce = false;
|
||||||
|
|||||||
@@ -171,16 +171,21 @@ struct rcu_node {
|
|||||||
/* an rcu_data structure, otherwise, each */
|
/* an rcu_data structure, otherwise, each */
|
||||||
/* bit corresponds to a child rcu_node */
|
/* bit corresponds to a child rcu_node */
|
||||||
/* structure. */
|
/* structure. */
|
||||||
unsigned long expmask; /* Groups that have ->blkd_tasks */
|
|
||||||
/* elements that need to drain to allow the */
|
|
||||||
/* current expedited grace period to */
|
|
||||||
/* complete (only for PREEMPT_RCU). */
|
|
||||||
unsigned long qsmaskinit;
|
unsigned long qsmaskinit;
|
||||||
/* Per-GP initial value for qsmask & expmask. */
|
/* Per-GP initial value for qsmask. */
|
||||||
/* Initialized from ->qsmaskinitnext at the */
|
/* Initialized from ->qsmaskinitnext at the */
|
||||||
/* beginning of each grace period. */
|
/* beginning of each grace period. */
|
||||||
unsigned long qsmaskinitnext;
|
unsigned long qsmaskinitnext;
|
||||||
/* Online CPUs for next grace period. */
|
/* Online CPUs for next grace period. */
|
||||||
|
unsigned long expmask; /* CPUs or groups that need to check in */
|
||||||
|
/* to allow the current expedited GP */
|
||||||
|
/* to complete. */
|
||||||
|
unsigned long expmaskinit;
|
||||||
|
/* Per-GP initial values for expmask. */
|
||||||
|
/* Initialized from ->expmaskinitnext at the */
|
||||||
|
/* beginning of each expedited GP. */
|
||||||
|
unsigned long expmaskinitnext;
|
||||||
|
/* Online CPUs for next expedited GP. */
|
||||||
unsigned long grpmask; /* Mask to apply to parent qsmask. */
|
unsigned long grpmask; /* Mask to apply to parent qsmask. */
|
||||||
/* Only one bit will be set in this mask. */
|
/* Only one bit will be set in this mask. */
|
||||||
int grplo; /* lowest-numbered CPU or group here. */
|
int grplo; /* lowest-numbered CPU or group here. */
|
||||||
@@ -466,6 +471,7 @@ struct rcu_state {
|
|||||||
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
|
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
|
||||||
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
|
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
|
||||||
void (*func)(struct rcu_head *head));
|
void (*func)(struct rcu_head *head));
|
||||||
|
int ncpus; /* # CPUs seen so far. */
|
||||||
|
|
||||||
/* The following fields are guarded by the root rcu_node's lock. */
|
/* The following fields are guarded by the root rcu_node's lock. */
|
||||||
|
|
||||||
@@ -508,6 +514,7 @@ struct rcu_state {
|
|||||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||||
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
||||||
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
||||||
|
int ncpus_snap; /* # CPUs seen last time. */
|
||||||
|
|
||||||
unsigned long jiffies_force_qs; /* Time at which to invoke */
|
unsigned long jiffies_force_qs; /* Time at which to invoke */
|
||||||
/* force_quiescent_state(). */
|
/* force_quiescent_state(). */
|
||||||
|
|||||||
@@ -536,87 +536,29 @@ void synchronize_rcu(void)
|
|||||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Snapshot the tasks blocking the newly started preemptible-RCU expedited
|
* Select the nodes that the upcoming expedited grace period needs
|
||||||
* grace period for the specified rcu_node structure, phase 1. If there
|
* to wait for.
|
||||||
* are such tasks, set the ->expmask bits up the rcu_node tree and also
|
|
||||||
* set the ->expmask bits on the leaf rcu_node structures to tell phase 2
|
|
||||||
* that work is needed here.
|
|
||||||
*
|
|
||||||
* Caller must hold the root rcu_node's exp_funnel_mutex.
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void sync_rcu_exp_select_nodes(struct rcu_state *rsp)
|
||||||
sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
|
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long mask;
|
struct rcu_node *rnp;
|
||||||
struct rcu_node *rnp_up;
|
|
||||||
|
|
||||||
|
sync_exp_reset_tree(rsp);
|
||||||
|
rcu_for_each_leaf_node(rsp, rnp) {
|
||||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||||
smp_mb__after_unlock_lock();
|
smp_mb__after_unlock_lock();
|
||||||
WARN_ON_ONCE(rnp->expmask);
|
rnp->expmask = 0; /* No per-CPU component yet. */
|
||||||
WARN_ON_ONCE(rnp->exp_tasks);
|
|
||||||
if (!rcu_preempt_has_tasks(rnp)) {
|
if (!rcu_preempt_has_tasks(rnp)) {
|
||||||
/* No blocked tasks, nothing to do. */
|
/* FIXME: Want __rcu_report_exp_rnp() here. */
|
||||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||||
return;
|
} else {
|
||||||
}
|
|
||||||
/* Call for Phase 2 and propagate ->expmask bits up the tree. */
|
|
||||||
rnp->expmask = 1;
|
|
||||||
rnp_up = rnp;
|
|
||||||
while (rnp_up->parent) {
|
|
||||||
mask = rnp_up->grpmask;
|
|
||||||
rnp_up = rnp_up->parent;
|
|
||||||
if (rnp_up->expmask & mask)
|
|
||||||
break;
|
|
||||||
raw_spin_lock(&rnp_up->lock); /* irqs already off */
|
|
||||||
smp_mb__after_unlock_lock();
|
|
||||||
rnp_up->expmask |= mask;
|
|
||||||
raw_spin_unlock(&rnp_up->lock); /* irqs still off */
|
|
||||||
}
|
|
||||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Snapshot the tasks blocking the newly started preemptible-RCU expedited
|
|
||||||
* grace period for the specified rcu_node structure, phase 2. If the
|
|
||||||
* leaf rcu_node structure has its ->expmask field set, check for tasks.
|
|
||||||
* If there are some, clear ->expmask and set ->exp_tasks accordingly,
|
|
||||||
* then initiate RCU priority boosting. Otherwise, clear ->expmask and
|
|
||||||
* invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
|
|
||||||
* enabling rcu_read_unlock_special() to do the bit-clearing.
|
|
||||||
*
|
|
||||||
* Caller must hold the root rcu_node's exp_funnel_mutex.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
||||||
smp_mb__after_unlock_lock();
|
|
||||||
if (!rnp->expmask) {
|
|
||||||
/* Phase 1 didn't do anything, so Phase 2 doesn't either. */
|
|
||||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Phase 1 is over. */
|
|
||||||
rnp->expmask = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If there are still blocked tasks, set up ->exp_tasks so that
|
|
||||||
* rcu_read_unlock_special() will wake us and then boost them.
|
|
||||||
*/
|
|
||||||
if (rcu_preempt_has_tasks(rnp)) {
|
|
||||||
rnp->exp_tasks = rnp->blkd_tasks.next;
|
rnp->exp_tasks = rnp->blkd_tasks.next;
|
||||||
rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
|
rcu_initiate_boost(rnp, flags);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* No longer any blocked tasks, so undo bit setting. */
|
|
||||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
||||||
rcu_report_exp_rnp(rsp, rnp, false);
|
rcu_report_exp_rnp(rsp, rnp, false);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* synchronize_rcu_expedited - Brute-force RCU grace period
|
* synchronize_rcu_expedited - Brute-force RCU grace period
|
||||||
@@ -648,16 +590,8 @@ void synchronize_rcu_expedited(void)
|
|||||||
/* force all RCU readers onto ->blkd_tasks lists. */
|
/* force all RCU readers onto ->blkd_tasks lists. */
|
||||||
synchronize_sched_expedited();
|
synchronize_sched_expedited();
|
||||||
|
|
||||||
/*
|
/* Initialize the rcu_node tree in preparation for the wait. */
|
||||||
* Snapshot current state of ->blkd_tasks lists into ->expmask.
|
sync_rcu_exp_select_nodes(rsp);
|
||||||
* Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
|
|
||||||
* to start clearing them. Doing this in one phase leads to
|
|
||||||
* strange races between setting and clearing bits, so just say "no"!
|
|
||||||
*/
|
|
||||||
rcu_for_each_leaf_node(rsp, rnp)
|
|
||||||
sync_rcu_preempt_exp_init1(rsp, rnp);
|
|
||||||
rcu_for_each_leaf_node(rsp, rnp)
|
|
||||||
sync_rcu_preempt_exp_init2(rsp, rnp);
|
|
||||||
|
|
||||||
/* Wait for snapshotted ->blkd_tasks lists to drain. */
|
/* Wait for snapshotted ->blkd_tasks lists to drain. */
|
||||||
rnp = rcu_get_root(rsp);
|
rnp = rcu_get_root(rsp);
|
||||||
|
|||||||
Reference in New Issue
Block a user