Merge tag 'rcu.2022.01.09a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu

Pull RCU updates from Paul McKenney:

 - Documentation updates, perhaps most notably Neil Brown's writeup of
   the reference-counting analogy to RCU.

 - Expedited grace-period cleanups.

 - Remove CONFIG_RCU_FAST_NO_HZ due to lack of valid users. I have asked
   around, posted a blog entry, and sent this series to LKML without
   result.

 - Miscellaneous fixes.

 - RCU callback offloading updates, perhaps most notably Frederic
   Weisbecker's updates allowing CPUs booted in the de-offloaded state
   to be offloaded at runtime.

 - nolibc fixes from Willy Tarreau and Anmar Faizi, but also including
   Mark Brown's addition of gettid().

 - RCU Tasks Trace fixes, including changes that increase the
   scalability of call_rcu_tasks_trace() for the BPF folks (Martin Lau
   and KP Singh).

 - Various fixes including those from Wander Lairson Costa and Li
   Zhijian.

 - Fixes plus addition of tests for the increased call_rcu_tasks_trace()
   scalability.

* tag 'rcu.2022.01.09a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (87 commits)
  rcu/nocb: Merge rcu_spawn_cpu_nocb_kthread() and rcu_spawn_one_nocb_kthread()
  rcu/nocb: Allow empty "rcu_nocbs" kernel parameter
  rcu/nocb: Create kthreads on all CPUs if "rcu_nocbs=" or "nohz_full=" are passed
  rcu/nocb: Optimize kthreads and rdp initialization
  rcu/nocb: Prepare nocb_cb_wait() to start with a non-offloaded rdp
  rcu/nocb: Remove rcu_node structure from nocb list when de-offloaded
  rcu-tasks: Use fewer callbacks queues if callback flood ends
  rcu-tasks: Use separate ->percpu_dequeue_lim for callback dequeueing
  rcu-tasks: Use more callback queues if contention encountered
  rcu-tasks: Avoid raw-spinlocked wakeups from call_rcu_tasks_generic()
  rcu-tasks: Count trylocks to estimate call_rcu_tasks() contention
  rcu-tasks: Add rcupdate.rcu_task_enqueue_lim to set initial queueing
  rcu-tasks: Make rcu_barrier_tasks*() handle multiple callback queues
  rcu-tasks: Use workqueues for multiple rcu_tasks_invoke_cbs() invocations
  rcu-tasks: Abstract invocations of callbacks
  rcu-tasks: Abstract checking of callback lists
  rcu-tasks: Add a ->percpu_enqueue_lim to the rcu_tasks structure
  rcu-tasks: Inspect stalled task's trc state in locked state
  rcu-tasks: Use spin_lock_rcu_node() and friends
  rcutorture: Combine n_max_cbs from all kthreads in a callback flood
  ...
This commit is contained in:
Linus Torvalds
2022-01-11 09:29:44 -08:00
68 changed files with 1243 additions and 794 deletions

View File

@@ -1047,7 +1047,7 @@ static int __init lock_torture_init(void)
sizeof(writer_tasks[0]),
GFP_KERNEL);
if (writer_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
TOROUT_ERRSTRING("writer_tasks: Out of memory");
firsterr = -ENOMEM;
goto unwind;
}
@@ -1058,7 +1058,7 @@ static int __init lock_torture_init(void)
sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
TOROUT_ERRSTRING("reader_tasks: Out of memory");
kfree(writer_tasks);
writer_tasks = NULL;
firsterr = -ENOMEM;

View File

@@ -112,7 +112,7 @@ config RCU_STALL_COMMON
making these warnings mandatory for the tree variants.
config RCU_NEED_SEGCBLIST
def_bool ( TREE_RCU || TREE_SRCU )
def_bool ( TREE_RCU || TREE_SRCU || TASKS_RCU_GENERIC )
config RCU_FANOUT
int "Tree-based hierarchical RCU fanout value"
@@ -169,24 +169,6 @@ config RCU_FANOUT_LEAF
Take the default if unsure.
config RCU_FAST_NO_HZ
bool "Accelerate last non-dyntick-idle CPU's grace periods"
depends on NO_HZ_COMMON && SMP && RCU_EXPERT
default n
help
This option permits CPUs to enter dynticks-idle state even if
they have RCU callbacks queued, and prevents RCU from waking
these CPUs up more than roughly once every four jiffies (by
default, you can adjust this using the rcutree.rcu_idle_gp_delay
parameter), thus improving energy efficiency. On the other
hand, this option increases the duration of RCU grace periods,
for example, slowing down synchronize_rcu().
Say Y if energy efficiency is critically important, and you
don't care about increased grace-period durations.
Say N if you are unsure.
config RCU_BOOST
bool "Enable RCU priority boosting"
depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT

View File

@@ -261,16 +261,14 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
}
/*
* Mark the specified rcu_segcblist structure as offloaded.
* Mark the specified rcu_segcblist structure as offloaded (or not)
*/
void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
{
if (offload) {
rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY);
rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED);
} else {
if (offload)
rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
else
rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
}
}
/*

View File

@@ -80,11 +80,14 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED);
}
/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */
/*
* Is the specified rcu_segcblist NOCB offloaded (or in the middle of the
* [de]offloading process)?
*/
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
{
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
!rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY))
rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING))
return true;
return false;
@@ -92,9 +95,8 @@ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp)
{
int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED;
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags)
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
!rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE))
return true;
return false;

View File

@@ -50,8 +50,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s)
#define VERBOSE_SCALEOUT_STRING(s) \
do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0)
#define VERBOSE_SCALEOUT_ERRSTRING(s) \
do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0)
#define SCALEOUT_ERRSTRING(s) \
pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s)
/*
* The intended use cases for the nreaders and nwriters module parameters
@@ -514,11 +514,11 @@ rcu_scale_cleanup(void)
* during the mid-boot phase, so have to wait till the end.
*/
if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
if (rcu_gp_is_normal() && gp_exp)
VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
if (gp_exp && gp_async)
VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
if (torture_cleanup_begin())
return;
@@ -845,7 +845,7 @@ rcu_scale_init(void)
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
@@ -865,7 +865,7 @@ rcu_scale_init(void)
kcalloc(nrealwriters, sizeof(*writer_n_durations),
GFP_KERNEL);
if (!writer_tasks || !writer_durations || !writer_n_durations) {
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}

View File

@@ -46,6 +46,7 @@
#include <linux/oom.h>
#include <linux/tick.h>
#include <linux/rcupdate_trace.h>
#include <linux/nmi.h>
#include "rcu.h"
@@ -53,15 +54,18 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
/* Bits for ->extendables field, extendables param, and related definitions. */
#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1)
#define RCUTORTURE_RDR_SHIFT_1 8 /* Put SRCU index in upper bits. */
#define RCUTORTURE_RDR_MASK_1 (1 << RCUTORTURE_RDR_SHIFT_1)
#define RCUTORTURE_RDR_SHIFT_2 9 /* Put SRCU index in upper bits. */
#define RCUTORTURE_RDR_MASK_2 (1 << RCUTORTURE_RDR_SHIFT_2)
#define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */
#define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */
#define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */
#define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */
#define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */
#define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */
#define RCUTORTURE_RDR_NBITS 6 /* Number of bits defined above. */
#define RCUTORTURE_RDR_RCU_1 0x20 /* ... entering another RCU reader. */
#define RCUTORTURE_RDR_RCU_2 0x40 /* ... entering another RCU reader. */
#define RCUTORTURE_RDR_NBITS 7 /* Number of bits defined above. */
#define RCUTORTURE_MAX_EXTEND \
(RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \
RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED)
@@ -75,7 +79,7 @@ torture_param(int, fqs_duration, 0,
"Duration of fqs bursts (us), 0 to disable");
torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
torture_param(bool, fwd_progress, 1, "Test grace-period forward progress");
torture_param(int, fwd_progress, 1, "Test grace-period forward progress");
torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
torture_param(int, fwd_progress_holdoff, 60,
"Time between forward-progress tests (s)");
@@ -109,6 +113,8 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
torture_param(int, stall_cpu_holdoff, 10,
"Time to wait before starting stall (s).");
torture_param(bool, stall_no_softlockup, false,
"Avoid softlockup warning during cpu stall.");
torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
torture_param(int, stall_gp_kthread, 0,
@@ -140,7 +146,7 @@ static struct task_struct *stats_task;
static struct task_struct *fqs_task;
static struct task_struct *boost_tasks[NR_CPUS];
static struct task_struct *stall_task;
static struct task_struct *fwd_prog_task;
static struct task_struct **fwd_prog_tasks;
static struct task_struct **barrier_cbs_tasks;
static struct task_struct *barrier_task;
static struct task_struct *read_exit_task;
@@ -342,10 +348,12 @@ struct rcu_torture_ops {
void (*gp_kthread_dbg)(void);
bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
int (*stall_dur)(void);
long cbflood_max;
int irq_capable;
int can_boost;
int extendables;
int slow_gps;
int no_pi_lock;
const char *name;
};
@@ -667,6 +675,7 @@ static struct rcu_torture_ops srcu_ops = {
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.name = "srcu"
};
@@ -700,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = {
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.name = "srcud"
};
@@ -720,6 +730,7 @@ static struct rcu_torture_ops busted_srcud_ops = {
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.extendables = RCUTORTURE_MAX_EXTEND,
.name = "busted_srcud"
};
@@ -831,6 +842,7 @@ static struct rcu_torture_ops tasks_rude_ops = {
.call = call_rcu_tasks_rude,
.cb_barrier = rcu_barrier_tasks_rude,
.gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
.cbflood_max = 50000,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
@@ -871,6 +883,7 @@ static struct rcu_torture_ops tasks_tracing_ops = {
.call = call_rcu_tasks_trace,
.cb_barrier = rcu_barrier_tasks_trace,
.gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
.cbflood_max = 50000,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
@@ -1420,13 +1433,15 @@ static void rcutorture_one_extend(int *readstate, int newstate,
struct rt_read_seg *rtrsp)
{
unsigned long flags;
int idxnew = -1;
int idxold = *readstate;
int idxnew1 = -1;
int idxnew2 = -1;
int idxold1 = *readstate;
int idxold2 = idxold1;
int statesnew = ~*readstate & newstate;
int statesold = *readstate & ~newstate;
WARN_ON_ONCE(idxold < 0);
WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
WARN_ON_ONCE(idxold2 < 0);
WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
rtrsp->rt_readstate = newstate;
/* First, put new protection in place to avoid critical-section gap. */
@@ -1440,8 +1455,10 @@ static void rcutorture_one_extend(int *readstate, int newstate,
preempt_disable();
if (statesnew & RCUTORTURE_RDR_SCHED)
rcu_read_lock_sched();
if (statesnew & RCUTORTURE_RDR_RCU)
idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
if (statesnew & RCUTORTURE_RDR_RCU_1)
idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1;
if (statesnew & RCUTORTURE_RDR_RCU_2)
idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2;
/*
* Next, remove old protection, in decreasing order of strength
@@ -1460,12 +1477,20 @@ static void rcutorture_one_extend(int *readstate, int newstate,
local_bh_enable();
if (statesold & RCUTORTURE_RDR_RBH)
rcu_read_unlock_bh();
if (statesold & RCUTORTURE_RDR_RCU) {
bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
if (statesold & RCUTORTURE_RDR_RCU_2) {
cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1);
WARN_ON_ONCE(idxnew2 != -1);
idxold2 = 0;
}
if (statesold & RCUTORTURE_RDR_RCU_1) {
bool lockit;
lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff);
if (lockit)
raw_spin_lock_irqsave(&current->pi_lock, flags);
cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1);
WARN_ON_ONCE(idxnew1 != -1);
idxold1 = 0;
if (lockit)
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
}
@@ -1475,13 +1500,19 @@ static void rcutorture_one_extend(int *readstate, int newstate,
cur_ops->read_delay(trsp, rtrsp);
/* Update the reader state. */
if (idxnew == -1)
idxnew = idxold & ~RCUTORTURE_RDR_MASK;
WARN_ON_ONCE(idxnew < 0);
WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1);
*readstate = idxnew | newstate;
WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0);
WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1);
if (idxnew1 == -1)
idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1;
WARN_ON_ONCE(idxnew1 < 0);
if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1))
pr_info("Unexpected idxnew1 value of %#x\n", idxnew1);
if (idxnew2 == -1)
idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2;
WARN_ON_ONCE(idxnew2 < 0);
WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
*readstate = idxnew1 | idxnew2 | newstate;
WARN_ON_ONCE(*readstate < 0);
if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1))
pr_info("Unexpected idxnew2 value of %#x\n", idxnew2);
}
/* Return the biggest extendables mask given current RCU and boot parameters. */
@@ -1491,7 +1522,7 @@ static int rcutorture_extend_mask_max(void)
WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND);
mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables;
mask = mask | RCUTORTURE_RDR_RCU;
mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
return mask;
}
@@ -1506,13 +1537,21 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1);
/* Mostly only one bit (need preemption!), sometimes lots of bits. */
if (!(randmask1 & 0x7))
mask = mask & randmask2;
else
mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
// Can't have nested RCU reader without outer RCU reader.
if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) {
if (oldmask & RCUTORTURE_RDR_RCU_1)
mask &= ~RCUTORTURE_RDR_RCU_2;
else
mask |= RCUTORTURE_RDR_RCU_1;
}
/*
* Can't enable bh w/irq disabled.
*/
@@ -1532,7 +1571,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
mask |= oldmask & bhs;
}
return mask ?: RCUTORTURE_RDR_RCU;
return mask ?: RCUTORTURE_RDR_RCU_1;
}
/*
@@ -1626,7 +1665,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
rcu_torture_writer_state,
cookie, cur_ops->get_gp_state());
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
WARN_ON_ONCE(readstate);
// This next splat is expected behavior if leakpointer, especially
// for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
@@ -2052,6 +2091,8 @@ static int rcu_torture_stall(void *args)
#else
schedule_timeout_uninterruptible(HZ);
#endif
} else if (stall_no_softlockup) {
touch_softlockup_watchdog();
}
if (stall_cpu_irqsoff)
local_irq_enable();
@@ -2123,10 +2164,13 @@ struct rcu_fwd {
unsigned long rcu_fwd_startat;
struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST];
unsigned long rcu_launder_gp_seq_start;
int rcu_fwd_id;
};
static DEFINE_MUTEX(rcu_fwd_mutex);
static struct rcu_fwd *rcu_fwds;
static unsigned long rcu_fwd_seq;
static atomic_long_t rcu_fwd_max_cbs;
static bool rcu_fwd_emergency_stop;
static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
@@ -2139,8 +2183,9 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
if (rfp->n_launders_hist[i].n_launders > 0)
break;
pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):",
__func__, jiffies - rfp->rcu_fwd_startat);
mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
__func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
gps_old = rfp->rcu_launder_gp_seq_start;
for (j = 0; j <= i; j++) {
gps = rfp->n_launders_hist[j].launder_gp_seq;
@@ -2151,6 +2196,7 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
gps_old = gps;
}
pr_cont("\n");
mutex_unlock(&rcu_fwd_mutex);
}
/* Callback function for continuous-flood RCU callbacks. */
@@ -2276,7 +2322,8 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
cver = READ_ONCE(rcu_torture_current_version) - cver;
gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
WARN_ON(!cver && gps < 2);
pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps);
pr_alert("%s: %d Duration %ld cver %ld gps %ld\n", __func__,
rfp->rcu_fwd_id, dur, cver, gps);
}
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 1);
@@ -2344,7 +2391,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
rfp->rcu_fwd_cb_head = rfcpn;
n_launders++;
n_launders_sa++;
} else {
} else if (!cur_ops->cbflood_max || cur_ops->cbflood_max > n_max_cbs) {
rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL);
if (WARN_ON_ONCE(!rfcp)) {
schedule_timeout_interruptible(1);
@@ -2354,8 +2401,11 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
n_launders_sa = 0;
rfcp->rfc_gps = 0;
rfcp->rfc_rfp = rfp;
} else {
rfcp = NULL;
}
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
if (rfcp)
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs);
if (tick_nohz_full_enabled()) {
local_irq_save(flags);
@@ -2379,6 +2429,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
n_launders + n_max_cbs - n_launders_cb_snap,
n_launders, n_launders_sa,
n_max_gps, n_max_cbs, cver, gps);
atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
rcu_torture_fwd_cb_hist(rfp);
}
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
@@ -2394,6 +2445,8 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
static int rcutorture_oom_notify(struct notifier_block *self,
unsigned long notused, void *nfreed)
{
int i;
long ncbs;
struct rcu_fwd *rfp;
mutex_lock(&rcu_fwd_mutex);
@@ -2404,18 +2457,26 @@ static int rcutorture_oom_notify(struct notifier_block *self,
}
WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
__func__);
rcu_torture_fwd_cb_hist(rfp);
rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp->rcu_fwd_startat)) / 2);
for (i = 0; i < fwd_progress; i++) {
rcu_torture_fwd_cb_hist(&rfp[i]);
rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp[i].rcu_fwd_startat)) / 2);
}
WRITE_ONCE(rcu_fwd_emergency_stop, true);
smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
pr_info("%s: Freed %lu RCU callbacks.\n",
__func__, rcu_torture_fwd_prog_cbfree(rfp));
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
rcu_barrier();
pr_info("%s: Freed %lu RCU callbacks.\n",
__func__, rcu_torture_fwd_prog_cbfree(rfp));
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
rcu_barrier();
pr_info("%s: Freed %lu RCU callbacks.\n",
__func__, rcu_torture_fwd_prog_cbfree(rfp));
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
smp_mb(); /* Frees before return to avoid redoing OOM. */
(*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */
pr_info("%s returning after OOM processing.\n", __func__);
@@ -2430,7 +2491,10 @@ static struct notifier_block rcutorture_oom_nb = {
/* Carry out grace-period forward-progress testing. */
static int rcu_torture_fwd_prog(void *args)
{
bool firsttime = true;
long max_cbs;
int oldnice = task_nice(current);
unsigned long oldseq = READ_ONCE(rcu_fwd_seq);
struct rcu_fwd *rfp = args;
int tested = 0;
int tested_tries = 0;
@@ -2440,21 +2504,38 @@ static int rcu_torture_fwd_prog(void *args)
if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST))
set_user_nice(current, MAX_NICE);
do {
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
WRITE_ONCE(rcu_fwd_emergency_stop, false);
if (!IS_ENABLED(CONFIG_TINY_RCU) ||
rcu_inkernel_boot_has_ended())
rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
if (rcu_inkernel_boot_has_ended())
if (!rfp->rcu_fwd_id) {
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
WRITE_ONCE(rcu_fwd_emergency_stop, false);
if (!firsttime) {
max_cbs = atomic_long_xchg(&rcu_fwd_max_cbs, 0);
pr_alert("%s n_max_cbs: %ld\n", __func__, max_cbs);
}
firsttime = false;
WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
} else {
while (READ_ONCE(rcu_fwd_seq) == oldseq)
schedule_timeout_interruptible(1);
oldseq = READ_ONCE(rcu_fwd_seq);
}
pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (rcu_inkernel_boot_has_ended() && torture_num_online_cpus() > rfp->rcu_fwd_id)
rcu_torture_fwd_prog_cr(rfp);
if ((cur_ops->stall_dur && cur_ops->stall_dur() > 0) &&
(!IS_ENABLED(CONFIG_TINY_RCU) ||
(rcu_inkernel_boot_has_ended() &&
torture_num_online_cpus() > rfp->rcu_fwd_id)))
rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
/* Avoid slow periods, better to test when busy. */
if (stutter_wait("rcu_torture_fwd_prog"))
sched_set_normal(current, oldnice);
} while (!torture_must_stop());
/* Short runs might not contain a valid forward-progress attempt. */
WARN_ON(!tested && tested_tries >= 5);
pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
if (!rfp->rcu_fwd_id) {
WARN_ON(!tested && tested_tries >= 5);
pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
}
torture_kthread_stopping("rcu_torture_fwd_prog");
return 0;
}
@@ -2462,17 +2543,28 @@ static int rcu_torture_fwd_prog(void *args)
/* If forward-progress checking is requested and feasible, spawn the thread. */
static int __init rcu_torture_fwd_prog_init(void)
{
int i;
int ret = 0;
struct rcu_fwd *rfp;
if (!fwd_progress)
return 0; /* Not requested, so don't do it. */
if (fwd_progress >= nr_cpu_ids) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Limiting fwd_progress to # CPUs.\n");
fwd_progress = nr_cpu_ids;
} else if (fwd_progress < 0) {
fwd_progress = nr_cpu_ids;
}
if ((!cur_ops->sync && !cur_ops->call) ||
!cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) {
(!cur_ops->cbflood_max && (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0)) ||
cur_ops == &rcu_busted_ops) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
fwd_progress = 0;
return 0;
}
if (stall_cpu > 0) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
fwd_progress = 0;
if (IS_MODULE(CONFIG_RCU_TORTURE_TEST))
return -EINVAL; /* In module, can fail back to user. */
WARN_ON(1); /* Make sure rcutorture notices conflict. */
@@ -2482,29 +2574,51 @@ static int __init rcu_torture_fwd_prog_init(void)
fwd_progress_holdoff = 1;
if (fwd_progress_div <= 0)
fwd_progress_div = 4;
rfp = kzalloc(sizeof(*rfp), GFP_KERNEL);
if (!rfp)
rfp = kcalloc(fwd_progress, sizeof(*rfp), GFP_KERNEL);
fwd_prog_tasks = kcalloc(fwd_progress, sizeof(*fwd_prog_tasks), GFP_KERNEL);
if (!rfp || !fwd_prog_tasks) {
kfree(rfp);
kfree(fwd_prog_tasks);
fwd_prog_tasks = NULL;
fwd_progress = 0;
return -ENOMEM;
spin_lock_init(&rfp->rcu_fwd_lock);
rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head;
}
for (i = 0; i < fwd_progress; i++) {
spin_lock_init(&rfp[i].rcu_fwd_lock);
rfp[i].rcu_fwd_cb_tail = &rfp[i].rcu_fwd_cb_head;
rfp[i].rcu_fwd_id = i;
}
mutex_lock(&rcu_fwd_mutex);
rcu_fwds = rfp;
mutex_unlock(&rcu_fwd_mutex);
register_oom_notifier(&rcutorture_oom_nb);
return torture_create_kthread(rcu_torture_fwd_prog, rfp, fwd_prog_task);
for (i = 0; i < fwd_progress; i++) {
ret = torture_create_kthread(rcu_torture_fwd_prog, &rcu_fwds[i], fwd_prog_tasks[i]);
if (ret) {
fwd_progress = i;
return ret;
}
}
return 0;
}
static void rcu_torture_fwd_prog_cleanup(void)
{
int i;
struct rcu_fwd *rfp;
torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
rfp = rcu_fwds;
if (!rcu_fwds || !fwd_prog_tasks)
return;
for (i = 0; i < fwd_progress; i++)
torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_tasks[i]);
unregister_oom_notifier(&rcutorture_oom_nb);
mutex_lock(&rcu_fwd_mutex);
rfp = rcu_fwds;
rcu_fwds = NULL;
mutex_unlock(&rcu_fwd_mutex);
unregister_oom_notifier(&rcutorture_oom_nb);
kfree(rfp);
kfree(fwd_prog_tasks);
fwd_prog_tasks = NULL;
}
/* Callback function for RCU barrier testing. */
@@ -2741,7 +2855,7 @@ static int rcu_torture_read_exit(void *unused)
&trs, "%s",
"rcu_torture_read_exit_child");
if (IS_ERR(tsp)) {
VERBOSE_TOROUT_ERRSTRING("out of memory");
TOROUT_ERRSTRING("out of memory");
errexit = true;
tsp = NULL;
break;
@@ -3068,7 +3182,7 @@ rcu_torture_init(void)
sizeof(fakewriter_tasks[0]),
GFP_KERNEL);
if (fakewriter_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("out of memory");
TOROUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
@@ -3084,7 +3198,7 @@ rcu_torture_init(void)
rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk),
GFP_KERNEL);
if (!reader_tasks || !rcu_torture_reader_mbchk) {
VERBOSE_TOROUT_ERRSTRING("out of memory");
TOROUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
@@ -3103,7 +3217,7 @@ rcu_torture_init(void)
if (nrealnocbers > 0) {
nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL);
if (nocb_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("out of memory");
TOROUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}

View File

@@ -44,7 +44,10 @@
pr_alert("%s" SCALE_FLAG s, scale_type, ## x)
#define VERBOSE_SCALEOUT(s, x...) \
do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0)
do { \
if (verbose) \
pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \
} while (0)
static atomic_t verbose_batch_ctr;
@@ -54,12 +57,11 @@ do { \
(verbose_batched <= 0 || \
!(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \
schedule_timeout_uninterruptible(1); \
pr_alert("%s" SCALE_FLAG s, scale_type, ## x); \
pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \
} \
} while (0)
#define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \
do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0)
#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
@@ -604,7 +606,7 @@ static u64 process_durations(int n)
char *buf;
u64 sum = 0;
buf = kmalloc(128 + nreaders * 32, GFP_KERNEL);
buf = kmalloc(800 + 64, GFP_KERNEL);
if (!buf)
return 0;
buf[0] = 0;
@@ -617,13 +619,15 @@ static u64 process_durations(int n)
if (i % 5 == 0)
strcat(buf, "\n");
if (strlen(buf) >= 800) {
pr_alert("%s", buf);
buf[0] = 0;
}
strcat(buf, buf1);
sum += rt->last_duration_ns;
}
strcat(buf, "\n");
SCALEOUT("%s\n", buf);
pr_alert("%s\n", buf);
kfree(buf);
return sum;
@@ -637,7 +641,6 @@ static u64 process_durations(int n)
// point all the timestamps are printed.
static int main_func(void *arg)
{
bool errexit = false;
int exp, r;
char buf1[64];
char *buf;
@@ -648,10 +651,10 @@ static int main_func(void *arg)
VERBOSE_SCALEOUT("main_func task started");
result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
buf = kzalloc(64 + nruns * 32, GFP_KERNEL);
buf = kzalloc(800 + 64, GFP_KERNEL);
if (!result_avg || !buf) {
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
errexit = true;
SCALEOUT_ERRSTRING("out of memory");
goto oom_exit;
}
if (holdoff)
schedule_timeout_interruptible(holdoff * HZ);
@@ -663,8 +666,6 @@ static int main_func(void *arg)
// Start exp readers up per experiment
for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
if (errexit)
break;
if (torture_must_stop())
goto end;
@@ -698,26 +699,23 @@ static int main_func(void *arg)
// Print the average of all experiments
SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
if (!errexit) {
buf[0] = 0;
strcat(buf, "\n");
strcat(buf, "Runs\tTime(ns)\n");
}
pr_alert("Runs\tTime(ns)\n");
for (exp = 0; exp < nruns; exp++) {
u64 avg;
u32 rem;
if (errexit)
break;
avg = div_u64_rem(result_avg[exp], 1000, &rem);
sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem);
strcat(buf, buf1);
if (strlen(buf) >= 800) {
pr_alert("%s", buf);
buf[0] = 0;
}
}
if (!errexit)
SCALEOUT("%s", buf);
pr_alert("%s", buf);
oom_exit:
// This will shutdown everything including us.
if (shutdown) {
shutdown_start = 1;
@@ -841,12 +839,12 @@ ref_scale_init(void)
reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (!reader_tasks) {
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders);
VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
for (i = 0; i < nreaders; i++) {
firsterr = torture_create_kthread(ref_scale_reader, (void *)i,

View File

@@ -99,7 +99,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
if (!newval && READ_ONCE(ssp->srcu_gp_waiting))
if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task())
swake_up_one(&ssp->srcu_wq);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);

View File

@@ -6,6 +6,7 @@
*/
#ifdef CONFIG_TASKS_RCU_GENERIC
#include "rcu_segcblist.h"
////////////////////////////////////////////////////////////////////////
//
@@ -19,12 +20,34 @@ typedef void (*postscan_func_t)(struct list_head *hop);
typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
/**
* struct rcu_tasks_percpu - Per-CPU component of definition for a Tasks-RCU-like mechanism.
* @cblist: Callback list.
* @lock: Lock protecting per-CPU callback list.
* @rtp_jiffies: Jiffies counter value for statistics.
* @rtp_n_lock_retries: Rough lock-contention statistic.
* @rtp_work: Work queue for invoking callbacks.
* @rtp_irq_work: IRQ work queue for deferred wakeups.
* @barrier_q_head: RCU callback for barrier operation.
* @cpu: CPU number corresponding to this entry.
* @rtpp: Pointer to the rcu_tasks structure.
*/
struct rcu_tasks_percpu {
struct rcu_segcblist cblist;
raw_spinlock_t __private lock;
unsigned long rtp_jiffies;
unsigned long rtp_n_lock_retries;
struct work_struct rtp_work;
struct irq_work rtp_irq_work;
struct rcu_head barrier_q_head;
int cpu;
struct rcu_tasks *rtpp;
};
/**
* struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
* @cbs_head: Head of callback list.
* @cbs_tail: Tail pointer for callback list.
* @cbs_wq: Wait queue allowing new callback to get kthread's attention.
* @cbs_lock: Lock protecting callback list.
* @cbs_gbl_lock: Lock protecting callback list.
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
* @gp_func: This flavor's grace-period-wait function.
* @gp_state: Grace period's most recent state transition (debugging).
@@ -32,7 +55,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* @init_fract: Initial backoff sleep interval.
* @gp_jiffies: Time of last @gp_state transition.
* @gp_start: Most recent grace-period start in jiffies.
* @n_gps: Number of grace periods completed since boot.
* @tasks_gp_seq: Number of grace periods completed since boot.
* @n_ipis: Number of IPIs sent to encourage grace periods to end.
* @n_ipis_fails: Number of IPI-send failures.
* @pregp_func: This flavor's pre-grace-period function (optional).
@@ -41,20 +64,27 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* @holdouts_func: This flavor's holdout-list scan function (optional).
* @postgp_func: This flavor's post-grace-period function (optional).
* @call_func: This flavor's call_rcu()-equivalent function.
* @rtpcpu: This flavor's rcu_tasks_percpu structure.
* @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks.
* @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing.
* @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing.
* @percpu_dequeue_gpseq: RCU grace-period number to propagate enqueue limit to dequeuers.
* @barrier_q_mutex: Serialize barrier operations.
* @barrier_q_count: Number of queues being waited on.
* @barrier_q_completion: Barrier wait/wakeup mechanism.
* @barrier_q_seq: Sequence number for barrier operations.
* @name: This flavor's textual name.
* @kname: This flavor's kthread name.
*/
struct rcu_tasks {
struct rcu_head *cbs_head;
struct rcu_head **cbs_tail;
struct wait_queue_head cbs_wq;
raw_spinlock_t cbs_lock;
raw_spinlock_t cbs_gbl_lock;
int gp_state;
int gp_sleep;
int init_fract;
unsigned long gp_jiffies;
unsigned long gp_start;
unsigned long n_gps;
unsigned long tasks_gp_seq;
unsigned long n_ipis;
unsigned long n_ipis_fails;
struct task_struct *kthread_ptr;
@@ -65,20 +95,40 @@ struct rcu_tasks {
holdouts_func_t holdouts_func;
postgp_func_t postgp_func;
call_rcu_func_t call_func;
struct rcu_tasks_percpu __percpu *rtpcpu;
int percpu_enqueue_shift;
int percpu_enqueue_lim;
int percpu_dequeue_lim;
unsigned long percpu_dequeue_gpseq;
struct mutex barrier_q_mutex;
atomic_t barrier_q_count;
struct completion barrier_q_completion;
unsigned long barrier_q_seq;
char *name;
char *kname;
};
#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
static struct rcu_tasks rt_name = \
{ \
.cbs_tail = &rt_name.cbs_head, \
.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
.cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock), \
.gp_func = gp, \
.call_func = call, \
.name = n, \
.kname = #rt_name, \
static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \
.rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \
}; \
static struct rcu_tasks rt_name = \
{ \
.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
.cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \
.gp_func = gp, \
.call_func = call, \
.rtpcpu = &rt_name ## __percpu, \
.name = n, \
.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \
.percpu_enqueue_lim = 1, \
.percpu_dequeue_lim = 1, \
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
.barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT, \
.kname = #rt_name, \
}
/* Track exiting tasks in order to allow them to be waited for. */
@@ -94,6 +144,15 @@ module_param(rcu_task_ipi_delay, int, 0644);
static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
module_param(rcu_task_stall_timeout, int, 0644);
static int rcu_task_enqueue_lim __read_mostly = -1;
module_param(rcu_task_enqueue_lim, int, 0444);
static bool rcu_task_cb_adjust;
static int rcu_task_contend_lim __read_mostly = 100;
module_param(rcu_task_contend_lim, int, 0444);
static int rcu_task_collapse_lim __read_mostly = 10;
module_param(rcu_task_collapse_lim, int, 0444);
/* RCU tasks grace-period state for debugging. */
#define RTGS_INIT 0
#define RTGS_WAIT_WAIT_CBS 1
@@ -128,6 +187,8 @@ static const char * const rcu_tasks_gp_state_names[] = {
//
// Generic code.
static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp);
/* Record grace-period phase and time. */
static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
{
@@ -148,23 +209,106 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
}
#endif /* #ifndef CONFIG_TINY_RCU */
// Initialize per-CPU callback lists for the specified flavor of
// Tasks RCU.
static void cblist_init_generic(struct rcu_tasks *rtp)
{
int cpu;
unsigned long flags;
int lim;
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rcu_task_enqueue_lim < 0) {
rcu_task_enqueue_lim = 1;
rcu_task_cb_adjust = true;
pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
} else if (rcu_task_enqueue_lim == 0) {
rcu_task_enqueue_lim = 1;
}
lim = rcu_task_enqueue_lim;
if (lim > nr_cpu_ids)
lim = nr_cpu_ids;
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim));
WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
smp_store_release(&rtp->percpu_enqueue_lim, lim);
for_each_possible_cpu(cpu) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
WARN_ON_ONCE(!rtpcp);
if (cpu)
raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
if (rcu_segcblist_empty(&rtpcp->cblist))
rcu_segcblist_init(&rtpcp->cblist);
INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
rtpcp->cpu = cpu;
rtpcp->rtpp = rtp;
raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
}
// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
{
struct rcu_tasks *rtp;
struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
rtp = rtpcp->rtpp;
wake_up(&rtp->cbs_wq);
}
// Enqueue a callback for the specified flavor of Tasks RCU.
static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
struct rcu_tasks *rtp)
{
unsigned long flags;
unsigned long j;
bool needadjust = false;
bool needwake;
struct rcu_tasks_percpu *rtpcp;
rhp->next = NULL;
rhp->func = func;
raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
needwake = !rtp->cbs_head;
WRITE_ONCE(*rtp->cbs_tail, rhp);
rtp->cbs_tail = &rhp->next;
raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
local_irq_save(flags);
rcu_read_lock();
rtpcp = per_cpu_ptr(rtp->rtpcpu,
smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift));
if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled.
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
j = jiffies;
if (rtpcp->rtp_jiffies != j) {
rtpcp->rtp_jiffies = j;
rtpcp->rtp_n_lock_retries = 0;
}
if (rcu_task_cb_adjust && ++rtpcp->rtp_n_lock_retries > rcu_task_contend_lim &&
READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids)
needadjust = true; // Defer adjustment to avoid deadlock.
}
if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) {
raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
cblist_init_generic(rtp);
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
}
needwake = rcu_segcblist_empty(&rtpcp->cblist);
rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
if (unlikely(needadjust)) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
rcu_read_unlock();
/* We can't create the thread unless interrupts are enabled. */
if (needwake && READ_ONCE(rtp->kthread_ptr))
wake_up(&rtp->cbs_wq);
irq_work_queue(&rtpcp->rtp_irq_work);
}
// Wait for a grace period for the specified flavor of Tasks RCU.
@@ -178,12 +322,173 @@ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
wait_rcu_gp(rtp->call_func);
}
// RCU callback function for rcu_barrier_tasks_generic().
static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp)
{
struct rcu_tasks *rtp;
struct rcu_tasks_percpu *rtpcp;
rtpcp = container_of(rhp, struct rcu_tasks_percpu, barrier_q_head);
rtp = rtpcp->rtpp;
if (atomic_dec_and_test(&rtp->barrier_q_count))
complete(&rtp->barrier_q_completion);
}
// Wait for all in-flight callbacks for the specified RCU Tasks flavor.
// Operates in a manner similar to rcu_barrier().
static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp)
{
int cpu;
unsigned long flags;
struct rcu_tasks_percpu *rtpcp;
unsigned long s = rcu_seq_snap(&rtp->barrier_q_seq);
mutex_lock(&rtp->barrier_q_mutex);
if (rcu_seq_done(&rtp->barrier_q_seq, s)) {
smp_mb();
mutex_unlock(&rtp->barrier_q_mutex);
return;
}
rcu_seq_start(&rtp->barrier_q_seq);
init_completion(&rtp->barrier_q_completion);
atomic_set(&rtp->barrier_q_count, 2);
for_each_possible_cpu(cpu) {
if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim))
break;
rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb;
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
if (rcu_segcblist_entrain(&rtpcp->cblist, &rtpcp->barrier_q_head))
atomic_inc(&rtp->barrier_q_count);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
if (atomic_sub_and_test(2, &rtp->barrier_q_count))
complete(&rtp->barrier_q_completion);
wait_for_completion(&rtp->barrier_q_completion);
rcu_seq_end(&rtp->barrier_q_seq);
mutex_unlock(&rtp->barrier_q_mutex);
}
// Advance callbacks and indicate whether either a grace period or
// callback invocation is needed.
static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
{
int cpu;
unsigned long flags;
long n;
long ncbs = 0;
long ncbsnz = 0;
int needgpcb = 0;
for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
/* Advance and accelerate any new callbacks. */
if (!rcu_segcblist_n_cbs(&rtpcp->cblist))
continue;
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
// Should we shrink down to a single callback queue?
n = rcu_segcblist_n_cbs(&rtpcp->cblist);
if (n) {
ncbs += n;
if (cpu > 0)
ncbsnz += n;
}
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
if (rcu_segcblist_pend_cbs(&rtpcp->cblist))
needgpcb |= 0x3;
if (!rcu_segcblist_empty(&rtpcp->cblist))
needgpcb |= 0x1;
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
// Shrink down to a single callback queue if appropriate.
// This is done in two stages: (1) If there are no more than
// rcu_task_collapse_lim callbacks on CPU 0 and none on any other
// CPU, limit enqueueing to CPU 0. (2) After an RCU grace period,
// if there has not been an increase in callbacks, limit dequeuing
// to CPU 0. Note the matching RCU read-side critical section in
// call_rcu_tasks_generic().
if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim > 1) {
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
smp_store_release(&rtp->percpu_enqueue_lim, 1);
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
if (rcu_task_cb_adjust && !ncbsnz &&
poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) {
WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
return needgpcb;
}
// Advance callbacks and invoke any that are ready.
static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu *rtpcp)
{
int cpu;
int cpunext;
unsigned long flags;
int len;
struct rcu_head *rhp;
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
struct rcu_tasks_percpu *rtpcp_next;
cpu = rtpcp->cpu;
cpunext = cpu * 2 + 1;
if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
cpunext++;
if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
}
}
if (rcu_segcblist_empty(&rtpcp->cblist))
return;
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
rcu_segcblist_extract_done_cbs(&rtpcp->cblist, &rcl);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
len = rcl.len;
for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) {
local_bh_disable();
rhp->func(rhp);
local_bh_enable();
cond_resched();
}
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
rcu_segcblist_add_len(&rtpcp->cblist, -len);
(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
// Workqueue flood to advance callbacks and invoke any that are ready.
static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp)
{
struct rcu_tasks *rtp;
struct rcu_tasks_percpu *rtpcp = container_of(wp, struct rcu_tasks_percpu, rtp_work);
rtp = rtpcp->rtpp;
rcu_tasks_invoke_cbs(rtp, rtpcp);
}
/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
static int __noreturn rcu_tasks_kthread(void *arg)
{
unsigned long flags;
struct rcu_head *list;
struct rcu_head *next;
int needgpcb;
struct rcu_tasks *rtp = arg;
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
@@ -199,42 +504,22 @@ static int __noreturn rcu_tasks_kthread(void *arg)
for (;;) {
set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
/* Pick up any new callbacks. */
raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
smp_mb__after_spinlock(); // Order updates vs. GP.
list = rtp->cbs_head;
rtp->cbs_head = NULL;
rtp->cbs_tail = &rtp->cbs_head;
raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
/* If there were none, wait a bit and start over. */
if (!list) {
wait_event_interruptible(rtp->cbs_wq,
READ_ONCE(rtp->cbs_head));
if (!rtp->cbs_head) {
WARN_ON(signal_pending(current));
set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS);
schedule_timeout_idle(HZ/10);
}
continue;
wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp)));
if (needgpcb & 0x2) {
// Wait for one grace period.
set_tasks_gp_state(rtp, RTGS_WAIT_GP);
rtp->gp_start = jiffies;
rcu_seq_start(&rtp->tasks_gp_seq);
rtp->gp_func(rtp);
rcu_seq_end(&rtp->tasks_gp_seq);
}
// Wait for one grace period.
set_tasks_gp_state(rtp, RTGS_WAIT_GP);
rtp->gp_start = jiffies;
rtp->gp_func(rtp);
rtp->n_gps++;
/* Invoke the callbacks. */
/* Invoke callbacks. */
set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
while (list) {
next = list->next;
local_bh_disable();
list->func(list);
local_bh_enable();
list = next;
cond_resched();
}
rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0));
/* Paranoid sleep to keep this from entering a tight loop */
schedule_timeout_idle(rtp->gp_sleep);
}
@@ -279,14 +564,15 @@ static void __init rcu_tasks_bootup_oddness(void)
/* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
{
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each...
pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
rtp->kname,
tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
jiffies - data_race(rtp->gp_jiffies),
data_race(rtp->n_gps),
data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
".k"[!!data_race(rtp->kthread_ptr)],
".C"[!!data_race(rtp->cbs_head)],
".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))],
s);
}
#endif // #ifndef CONFIG_TINY_RCU
@@ -411,10 +697,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
// read-side critical sections waited for by rcu_tasks_postscan().
//
// Pre-grace-period update-side code is ordered before the grace via the
// ->cbs_lock and the smp_mb__after_spinlock(). Pre-grace-period read-side
// code is ordered before the grace period via synchronize_rcu() call
// in rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
// Pre-grace-period update-side code is ordered before the grace
// via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code
// is ordered before the grace period via synchronize_rcu() call in
// rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
// disabling.
/* Pre-grace-period preparation. */
@@ -586,13 +872,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
*/
void rcu_barrier_tasks(void)
{
/* There is only one callback queue, so this is easy. ;-) */
synchronize_rcu_tasks();
rcu_barrier_tasks_generic(&rcu_tasks);
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
static int __init rcu_spawn_tasks_kthread(void)
{
cblist_init_generic(&rcu_tasks);
rcu_tasks.gp_sleep = HZ / 10;
rcu_tasks.init_fract = HZ / 10;
rcu_tasks.pregp_func = rcu_tasks_pregp_step;
@@ -724,13 +1010,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
*/
void rcu_barrier_tasks_rude(void)
{
/* There is only one callback queue, so this is easy. ;-) */
synchronize_rcu_tasks_rude();
rcu_barrier_tasks_generic(&rcu_tasks_rude);
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
static int __init rcu_spawn_tasks_rude_kthread(void)
{
cblist_init_generic(&rcu_tasks_rude);
rcu_tasks_rude.gp_sleep = HZ / 10;
rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
return 0;
@@ -1073,25 +1359,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
// Any tasks that exit after this point will set ->trc_reader_checked.
}
/* Communicate task state back to the RCU tasks trace stall warning request. */
struct trc_stall_chk_rdr {
int nesting;
int ipi_to_cpu;
u8 needqs;
};
static int trc_check_slow_task(struct task_struct *t, void *arg)
{
struct trc_stall_chk_rdr *trc_rdrp = arg;
if (task_curr(t))
return false; // It is running, so decline to inspect it.
trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs);
return true;
}
/* Show the state of a task stalling the current RCU tasks trace GP. */
static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
{
int cpu;
struct trc_stall_chk_rdr trc_rdr;
bool is_idle_tsk = is_idle_task(t);
if (*firstreport) {
pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
*firstreport = false;
}
// FIXME: This should attempt to use try_invoke_on_nonrunning_task().
cpu = task_cpu(t);
pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
t->pid,
".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0],
".i"[is_idle_task(t)],
".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
READ_ONCE(t->trc_reader_nesting),
" N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
cpu);
if (!task_call_func(t, trc_check_slow_task, &trc_rdr))
pr_alert("P%d: %c\n",
t->pid,
".i"[is_idle_tsk]);
else
pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
t->pid,
".I"[trc_rdr.ipi_to_cpu >= 0],
".i"[is_idle_tsk],
".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
trc_rdr.nesting,
" N"[!!trc_rdr.needqs],
cpu);
sched_show_task(t);
}
@@ -1121,7 +1432,8 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,
trc_wait_for_one_reader(t, hop);
// If check succeeded, remove this task from the list.
if (READ_ONCE(t->trc_reader_checked))
if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 &&
READ_ONCE(t->trc_reader_checked))
trc_del_holdout(t);
else if (needreport)
show_stalled_task_trace(t, firstreport);
@@ -1156,7 +1468,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
// Yes, this assumes that CPUs process IPIs in order. If that ever
// changes, there will need to be a recheck and/or timed wait.
for_each_online_cpu(cpu)
if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))
if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))))
smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1);
// Remove the safety count.
@@ -1256,13 +1568,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);
*/
void rcu_barrier_tasks_trace(void)
{
/* There is only one callback queue, so this is easy. ;-) */
synchronize_rcu_tasks_trace();
rcu_barrier_tasks_generic(&rcu_tasks_trace);
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
static int __init rcu_spawn_tasks_trace_kthread(void)
{
cblist_init_generic(&rcu_tasks_trace);
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
rcu_tasks_trace.gp_sleep = HZ / 10;
rcu_tasks_trace.init_fract = HZ / 10;

View File

@@ -79,7 +79,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
.dynticks = ATOMIC_INIT(1),
#ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
.cblist.flags = SEGCBLIST_RCU_CORE,
#endif
};
static struct rcu_state rcu_state = {
@@ -624,7 +624,6 @@ static noinstr void rcu_eqs_enter(bool user)
instrumentation_begin();
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
@@ -768,9 +767,6 @@ noinstr void rcu_nmi_exit(void)
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
if (!in_nmi())
rcu_prepare_for_idle();
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
instrumentation_end();
@@ -872,7 +868,6 @@ static void noinstr rcu_eqs_exit(bool user)
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
WRITE_ONCE(rdp->dynticks_nesting, 1);
@@ -1014,12 +1009,6 @@ noinstr void rcu_nmi_enter(void)
rcu_dynticks_eqs_exit();
// ... but is watching here.
if (!in_nmi()) {
instrumentation_begin();
rcu_cleanup_after_idle();
instrumentation_end();
}
instrumentation_begin();
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
@@ -1086,6 +1075,24 @@ void rcu_irq_enter_irqson(void)
local_irq_restore(flags);
}
/*
* Check to see if any future non-offloaded RCU-related work will need
* to be done by the current CPU, even if none need be done immediately,
* returning 1 if so. This function is part of the RCU implementation;
* it is -not- an exported member of the RCU API. This is used by
* the idle-entry code to figure out whether it is safe to disable the
* scheduler-clock interrupt.
*
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
*nextevt = KTIME_MAX;
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
/*
* If any sort of urgency was applied to the current CPU (for example,
* the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
@@ -1467,7 +1474,7 @@ static void rcu_gp_kthread_wake(void)
{
struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
if ((current == t && !in_irq() && !in_serving_softirq()) ||
if ((current == t && !in_hardirq() && !in_serving_softirq()) ||
!READ_ONCE(rcu_state.gp_flags) || !t)
return;
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
@@ -1590,10 +1597,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
struct rcu_data *rdp)
{
rcu_lockdep_assert_cblist_protected(rdp);
if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
!raw_spin_trylock_rcu_node(rnp))
if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
return;
WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
// The grace period cannot end while we hold the rcu_node lock.
if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
raw_spin_unlock_rcu_node(rnp);
}
@@ -2277,7 +2285,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
unsigned long flags;
unsigned long mask;
bool needwake = false;
const bool offloaded = rcu_rdp_is_offloaded(rdp);
bool needacc = false;
struct rcu_node *rnp;
WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2304,15 +2312,30 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
/*
* This GP can't end until cpu checks in, so all of our
* callbacks can be processed during the next GP.
*
* NOCB kthreads have their own way to deal with that...
*/
if (!offloaded)
if (!rcu_rdp_is_offloaded(rdp)) {
needwake = rcu_accelerate_cbs(rnp, rdp);
} else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
/*
* ...but NOCB kthreads may miss or delay callbacks acceleration
* if in the middle of a (de-)offloading process.
*/
needacc = true;
}
rcu_disable_urgency_upon_qs(rdp);
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
/* ^^^ Released rnp->lock */
if (needwake)
rcu_gp_kthread_wake();
if (needacc) {
rcu_nocb_lock_irqsave(rdp, flags);
rcu_accelerate_cbs_unlocked(rnp, rdp);
rcu_nocb_unlock_irqrestore(rdp, flags);
}
}
}
@@ -2444,7 +2467,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
int div;
bool __maybe_unused empty;
unsigned long flags;
const bool offloaded = rcu_rdp_is_offloaded(rdp);
struct rcu_head *rhp;
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
long bl, count = 0;
@@ -2462,18 +2484,17 @@ static void rcu_do_batch(struct rcu_data *rdp)
}
/*
* Extract the list of ready callbacks, disabling to prevent
* Extract the list of ready callbacks, disabling IRQs to prevent
* races with call_rcu() from interrupt handlers. Leave the
* callback counts, as rcu_barrier() needs to be conservative.
*/
local_irq_save(flags);
rcu_nocb_lock(rdp);
rcu_nocb_lock_irqsave(rdp, flags);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
pending = rcu_segcblist_n_cbs(&rdp->cblist);
div = READ_ONCE(rcu_divisor);
div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
bl = max(rdp->blimit, pending >> div);
if (unlikely(bl > 100)) {
if (in_serving_softirq() && unlikely(bl > 100)) {
long rrn = READ_ONCE(rcu_resched_ns);
rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
@@ -2482,7 +2503,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
trace_rcu_batch_start(rcu_state.name,
rcu_segcblist_n_cbs(&rdp->cblist), bl);
rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
if (offloaded)
if (rcu_rdp_is_offloaded(rdp))
rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
@@ -2510,18 +2531,21 @@ static void rcu_do_batch(struct rcu_data *rdp)
/*
* Stop only if limit reached and CPU has something to do.
*/
if (count >= bl && !offloaded &&
(need_resched() ||
(!is_idle_task(current) && !rcu_is_callbacks_kthread())))
break;
if (unlikely(tlimit)) {
/* only call local_clock() every 32 callbacks */
if (likely((count & 31) || local_clock() < tlimit))
continue;
/* Exceeded the time limit, so leave. */
break;
}
if (!in_serving_softirq()) {
if (in_serving_softirq()) {
if (count >= bl && (need_resched() || !is_idle_task(current)))
break;
/*
* Make sure we don't spend too much time here and deprive other
* softirq vectors of CPU cycles.
*/
if (unlikely(tlimit)) {
/* only call local_clock() every 32 callbacks */
if (likely((count & 31) || local_clock() < tlimit))
continue;
/* Exceeded the time limit, so leave. */
break;
}
} else {
local_bh_enable();
lockdep_assert_irqs_enabled();
cond_resched_tasks_rcu_qs();
@@ -2530,8 +2554,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
}
}
local_irq_save(flags);
rcu_nocb_lock(rdp);
rcu_nocb_lock_irqsave(rdp, flags);
rdp->n_cbs_invoked += count;
trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
is_idle_task(current), rcu_is_callbacks_kthread());
@@ -2565,9 +2588,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
rcu_nocb_unlock_irqrestore(rdp, flags);
/* Re-invoke RCU core processing if there are callbacks remaining. */
if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))
invoke_rcu_core();
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
}
@@ -2706,6 +2726,23 @@ static __latent_entropy void rcu_core(void)
unsigned long flags;
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
/*
* On RT rcu_core() can be preempted when IRQs aren't disabled.
* Therefore this function can race with concurrent NOCB (de-)offloading
* on this CPU and the below condition must be considered volatile.
* However if we race with:
*
* _ Offloading: In the worst case we accelerate or process callbacks
* concurrently with NOCB kthreads. We are guaranteed to
* call rcu_nocb_lock() if that happens.
*
* _ Deoffloading: In the worst case we miss callbacks acceleration or
* processing. This is fine because the early stage
* of deoffloading invokes rcu_core() after setting
* SEGCBLIST_RCU_CORE. So we guarantee that we'll process
* what could have been dismissed without the need to wait
* for the next rcu_pending() check in the next jiffy.
*/
const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
if (cpu_is_offline(smp_processor_id()))
@@ -2714,7 +2751,7 @@ static __latent_entropy void rcu_core(void)
WARN_ON_ONCE(!rdp->beenonline);
/* Report any deferred quiescent states if preemption enabled. */
if (!(preempt_count() & PREEMPT_MASK)) {
if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
rcu_preempt_deferred_qs(current);
} else if (rcu_preempt_need_deferred_qs(current)) {
set_tsk_need_resched(current);
@@ -2737,8 +2774,12 @@ static __latent_entropy void rcu_core(void)
/* If there are callbacks ready, invoke them. */
if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
likely(READ_ONCE(rcu_scheduler_fully_active)))
likely(READ_ONCE(rcu_scheduler_fully_active))) {
rcu_do_batch(rdp);
/* Re-invoke RCU core processing if there are callbacks remaining. */
if (rcu_segcblist_ready_cbs(&rdp->cblist))
invoke_rcu_core();
}
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
@@ -2982,7 +3023,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
head->func = func;
head->next = NULL;
local_irq_save(flags);
kasan_record_aux_stack(head);
kasan_record_aux_stack_noalloc(head);
rdp = this_cpu_ptr(&rcu_data);
/* Add the callback to our list. */
@@ -3547,7 +3588,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
return;
}
kasan_record_aux_stack(ptr);
kasan_record_aux_stack_noalloc(ptr);
success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head);
if (!success) {
run_page_cache_worker(krcp);

View File

@@ -157,7 +157,6 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiescent state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
bool cpu_started; /* RCU watching this onlining CPU. */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
@@ -189,11 +188,6 @@ struct rcu_data {
bool rcu_urgent_qs; /* GP old need light quiescent state. */
bool rcu_forced_tick; /* Forced tick to provide QS. */
bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */
#ifdef CONFIG_RCU_FAST_NO_HZ
unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */
unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
/* 4) rcu_barrier(), OOM callbacks, and expediting. */
struct rcu_head barrier_head;
@@ -227,8 +221,11 @@ struct rcu_data {
struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */
bool nocb_cb_sleep; /* Is the nocb CB thread asleep? */
struct task_struct *nocb_cb_kthread;
struct rcu_data *nocb_next_cb_rdp;
/* Next rcu_data in wakeup chain. */
struct list_head nocb_head_rdp; /*
* Head of rcu_data list in wakeup chain,
* if rdp_gp.
*/
struct list_head nocb_entry_rdp; /* rcu_data node in wakeup chain. */
/* The following fields are used by CB kthread, hence new cacheline. */
struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp;
@@ -419,8 +416,6 @@ static bool rcu_is_callbacks_kthread(void);
static void rcu_cpu_kthread_setup(unsigned int cpu);
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_cleanup_after_idle(void);
static void rcu_prepare_for_idle(void);
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
static void rcu_preempt_deferred_qs(struct task_struct *t);
@@ -447,12 +442,16 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp);
#ifdef CONFIG_RCU_NOCB_CPU
static void __init rcu_organize_nocb_kthreads(void);
#define rcu_nocb_lock_irqsave(rdp, flags) \
do { \
if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) \
local_irq_save(flags); \
else \
raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \
/*
* Disable IRQs before checking offloaded state so that local
* locking is safe against concurrent de-offloading.
*/
#define rcu_nocb_lock_irqsave(rdp, flags) \
do { \
local_irq_save(flags); \
if (rcu_segcblist_is_offloaded(&(rdp)->cblist)) \
raw_spin_lock(&(rdp)->nocb_lock); \
} while (0)
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
#define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)

View File

@@ -255,7 +255,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
*/
static void rcu_report_exp_rdp(struct rcu_data *rdp)
{
WRITE_ONCE(rdp->exp_deferred_qs, false);
WRITE_ONCE(rdp->cpu_no_qs.b.exp, false);
rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
}
@@ -387,6 +387,7 @@ retry_ipi:
continue;
}
if (get_cpu() == cpu) {
mask_ofl_test |= mask;
put_cpu();
continue;
}
@@ -506,7 +507,10 @@ static void synchronize_rcu_expedited_wait(void)
if (rdp->rcu_forced_tick_exp)
continue;
rdp->rcu_forced_tick_exp = true;
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
preempt_disable();
if (cpu_online(cpu))
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
preempt_enable();
}
}
j = READ_ONCE(jiffies_till_first_fqs);
@@ -655,7 +659,7 @@ static void rcu_exp_handler(void *unused)
rcu_dynticks_curr_cpu_in_eqs()) {
rcu_report_exp_rdp(rdp);
} else {
rdp->exp_deferred_qs = true;
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
set_tsk_need_resched(t);
set_preempt_need_resched();
}
@@ -677,7 +681,7 @@ static void rcu_exp_handler(void *unused)
if (depth > 0) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
rdp->exp_deferred_qs = true;
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
t->rcu_read_unlock_special.b.exp_hint = true;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -759,7 +763,7 @@ static void sync_sched_exp_online_cleanup(int cpu)
my_cpu = get_cpu();
/* Quiescent state either not needed or already requested, leave. */
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
rdp->cpu_no_qs.b.exp) {
READ_ONCE(rdp->cpu_no_qs.b.exp)) {
put_cpu();
return;
}

View File

@@ -60,16 +60,22 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
* If the list is invalid, a warning is emitted and all CPUs are offloaded.
*/
static bool rcu_nocb_is_setup;
static int __init rcu_nocb_setup(char *str)
{
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
if (cpulist_parse(str, rcu_nocb_mask)) {
pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
cpumask_setall(rcu_nocb_mask);
if (*str == '=') {
if (cpulist_parse(++str, rcu_nocb_mask)) {
pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
cpumask_setall(rcu_nocb_mask);
}
}
rcu_nocb_is_setup = true;
return 1;
}
__setup("rcu_nocbs=", rcu_nocb_setup);
__setup("rcu_nocbs", rcu_nocb_setup);
static int __init parse_rcu_nocb_poll(char *arg)
{
@@ -625,7 +631,21 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
* and the global grace-period kthread are awakened if needed.
*/
WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
/*
* An rcu_data structure is removed from the list after its
* CPU is de-offloaded and added to the list before that CPU is
* (re-)offloaded. If the following loop happens to be referencing
* that rcu_data structure during the time that the corresponding
* CPU is de-offloaded and then immediately re-offloaded, this
* loop's rdp pointer will be carried to the end of the list by
* the resulting pair of list operations. This can cause the loop
* to skip over some of the rcu_data structures that were supposed
* to have been scanned. Fortunately a new iteration through the
* entire loop is forced after a given CPU's rcu_data structure
* is added to the list, so the skipped-over rcu_data structures
* won't be ignored for long.
*/
list_for_each_entry_rcu(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp, 1) {
bool needwake_state = false;
if (!nocb_gp_enabled_cb(rdp))
@@ -789,6 +809,18 @@ static void nocb_cb_wait(struct rcu_data *rdp)
bool can_sleep = true;
struct rcu_node *rnp = rdp->mynode;
do {
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
// VVV Ensure CB invocation follows _sleep test.
if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
}
} while (!nocb_cb_can_run(rdp));
local_irq_save(flags);
rcu_momentary_dyntick_idle();
local_irq_restore(flags);
@@ -841,17 +873,6 @@ static void nocb_cb_wait(struct rcu_data *rdp)
if (needwake_state)
swake_up_one(&rdp->nocb_state_wq);
do {
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
// VVV Ensure CB invocation follows _sleep test.
if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
}
} while (!nocb_cb_can_run(rdp));
}
/*
@@ -990,22 +1011,33 @@ static long rcu_nocb_rdp_deoffload(void *arg)
* will refuse to put anything into the bypass.
*/
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
/*
* Start with invoking rcu_core() early. This way if the current thread
* happens to preempt an ongoing call to rcu_core() in the middle,
* leaving some work dismissed because rcu_core() still thinks the rdp is
* completely offloaded, we are guaranteed a nearby future instance of
* rcu_core() to catch up.
*/
rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
invoke_rcu_core();
ret = rdp_offload_toggle(rdp, false, flags);
swait_event_exclusive(rdp->nocb_state_wq,
!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
SEGCBLIST_KTHREAD_GP));
/* Stop nocb_gp_wait() from iterating over this structure. */
list_del_rcu(&rdp->nocb_entry_rdp);
/*
* Lock one last time to acquire latest callback updates from kthreads
* so we can later handle callbacks locally without locking.
*/
rcu_nocb_lock_irqsave(rdp, flags);
/*
* Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb
* Theoretically we could clear SEGCBLIST_LOCKING after the nocb
* lock is released but how about being paranoid for once?
*/
rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING);
/*
* With SEGCBLIST_SOFTIRQ_ONLY, we can't use
* Without SEGCBLIST_LOCKING, we can't use
* rcu_nocb_unlock_irqrestore() anymore.
*/
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
@@ -1057,15 +1089,26 @@ static long rcu_nocb_rdp_offload(void *arg)
return -EINVAL;
pr_info("Offloading %d\n", rdp->cpu);
/*
* Can't use rcu_nocb_lock_irqsave() while we are in
* SEGCBLIST_SOFTIRQ_ONLY mode.
* Cause future nocb_gp_wait() invocations to iterate over
* structure, resetting ->nocb_gp_sleep and waking up the related
* "rcuog". Since nocb_gp_wait() in turn locks ->nocb_gp_lock
* before setting ->nocb_gp_sleep again, we are guaranteed to
* iterate this newly added structure before "rcuog" goes to
* sleep again.
*/
list_add_tail_rcu(&rdp->nocb_entry_rdp, &rdp->nocb_gp_rdp->nocb_head_rdp);
/*
* Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
* is set.
*/
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
/*
* We didn't take the nocb lock while working on the
* rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
* rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode).
* Every modifications that have been done previously on
* rdp->cblist must be visible remotely by the nocb kthreads
* upon wake up after reading the cblist flags.
@@ -1084,6 +1127,14 @@ static long rcu_nocb_rdp_offload(void *arg)
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
/*
* All kthreads are ready to work, we can finally relieve rcu_core() and
* enable nocb bypass.
*/
rcu_nocb_lock_irqsave(rdp, flags);
rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
rcu_nocb_unlock_irqrestore(rdp, flags);
return ret;
}
@@ -1122,13 +1173,17 @@ void __init rcu_init_nohz(void)
need_rcu_nocb_mask = true;
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
return;
if (need_rcu_nocb_mask) {
if (!cpumask_available(rcu_nocb_mask)) {
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
return;
}
}
rcu_nocb_is_setup = true;
}
if (!cpumask_available(rcu_nocb_mask))
if (!rcu_nocb_is_setup)
return;
#if defined(CONFIG_NO_HZ_FULL)
@@ -1154,8 +1209,8 @@ void __init rcu_init_nohz(void)
if (rcu_segcblist_empty(&rdp->cblist))
rcu_segcblist_init(&rdp->cblist);
rcu_segcblist_offload(&rdp->cblist, true);
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE);
}
rcu_organize_nocb_kthreads();
}
@@ -1178,17 +1233,17 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
* rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
* for this CPU's group has not yet been created, spawn it as well.
*/
static void rcu_spawn_one_nocb_kthread(int cpu)
static void rcu_spawn_cpu_nocb_kthread(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_data *rdp_gp;
struct task_struct *t;
/*
* If this isn't a no-CBs CPU or if it already has an rcuo kthread,
* then nothing to do.
*/
if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
return;
/* If there already is an rcuo kthread, then nothing to do. */
if (rdp->nocb_cb_kthread)
return;
/* If we didn't spawn the GP kthread first, reorganize! */
@@ -1210,16 +1265,6 @@ static void rcu_spawn_one_nocb_kthread(int cpu)
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
}
/*
* If the specified CPU is a no-CBs CPU that does not already have its
* rcuo kthread, spawn it.
*/
static void rcu_spawn_cpu_nocb_kthread(int cpu)
{
if (rcu_scheduler_fully_active)
rcu_spawn_one_nocb_kthread(cpu);
}
/*
* Once the scheduler is running, spawn rcuo kthreads for all online
* no-CBs CPUs. This assumes that the early_initcall()s happen before
@@ -1230,8 +1275,10 @@ static void __init rcu_spawn_nocb_kthreads(void)
{
int cpu;
for_each_online_cpu(cpu)
rcu_spawn_cpu_nocb_kthread(cpu);
if (rcu_nocb_is_setup) {
for_each_online_cpu(cpu)
rcu_spawn_cpu_nocb_kthread(cpu);
}
}
/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
@@ -1251,7 +1298,6 @@ static void __init rcu_organize_nocb_kthreads(void)
int nl = 0; /* Next GP kthread. */
struct rcu_data *rdp;
struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */
struct rcu_data *rdp_prev = NULL;
if (!cpumask_available(rcu_nocb_mask))
return;
@@ -1265,14 +1311,14 @@ static void __init rcu_organize_nocb_kthreads(void)
* Should the corresponding CPU come online in the future, then
* we will spawn the needed set of rcu_nocb_kthread() kthreads.
*/
for_each_cpu(cpu, rcu_nocb_mask) {
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
if (rdp->cpu >= nl) {
/* New GP kthread, set up for CBs & next GP. */
gotnocbs = true;
nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
rdp->nocb_gp_rdp = rdp;
rdp_gp = rdp;
INIT_LIST_HEAD(&rdp->nocb_head_rdp);
if (dump_tree) {
if (!firsttime)
pr_cont("%s\n", gotnocbscbs
@@ -1285,12 +1331,12 @@ static void __init rcu_organize_nocb_kthreads(void)
} else {
/* Another CB kthread, link to previous GP kthread. */
gotnocbscbs = true;
rdp->nocb_gp_rdp = rdp_gp;
rdp_prev->nocb_next_cb_rdp = rdp;
if (dump_tree)
pr_cont(" %d", cpu);
}
rdp_prev = rdp;
rdp->nocb_gp_rdp = rdp_gp;
if (cpumask_test_cpu(cpu, rcu_nocb_mask))
list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp);
}
if (gotnocbs && dump_tree)
pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
@@ -1352,6 +1398,7 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
{
char bufw[20];
char bufr[20];
struct rcu_data *nocb_next_rdp;
struct rcu_segcblist *rsclp = &rdp->cblist;
bool waslocked;
bool wassleep;
@@ -1359,11 +1406,16 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
if (rdp->nocb_gp_rdp == rdp)
show_rcu_nocb_gp_state(rdp);
nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp,
&rdp->nocb_entry_rdp,
typeof(*rdp),
nocb_entry_rdp);
sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
rdp->cpu, rdp->nocb_gp_rdp->cpu,
rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1,
nocb_next_rdp ? nocb_next_rdp->cpu : -1,
"kK"[!!rdp->nocb_cb_kthread],
"bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
"cC"[!!atomic_read(&rdp->nocb_lock_contended)],

View File

@@ -16,7 +16,7 @@
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
{
/*
* In order to read the offloaded state of an rdp is a safe
* In order to read the offloaded state of an rdp in a safe
* and stable way and prevent from its value to be changed
* under us, we must either hold the barrier mutex, the cpu
* hotplug lock (read or write) or the nocb lock. Local
@@ -51,12 +51,10 @@ static void __init rcu_bootup_announce_oddness(void)
RCU_FANOUT);
if (rcu_fanout_exact)
pr_info("\tHierarchical RCU autobalancing is disabled.\n");
if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16)
@@ -88,13 +86,13 @@ static void __init rcu_bootup_announce_oddness(void)
if (rcu_kick_kthreads)
pr_info("\tKick kthreads if too-long grace period.\n");
if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
if (gp_preinit_delay)
pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
if (gp_init_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
if (!use_softirq)
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
@@ -260,10 +258,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* no need to check for a subsequent expedited GP. (Though we are
* still in a quiescent state in any case.)
*/
if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
else
WARN_ON_ONCE(rdp->exp_deferred_qs);
WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
}
/*
@@ -277,12 +275,16 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* current task, there might be any number of other tasks blocked while
* in an RCU read-side critical section.
*
* Unlike non-preemptible-RCU, quiescent state reports for expedited
* grace periods are handled separately via deferred quiescent states
* and context switch events.
*
* Callers to this function must disable preemption.
*/
static void rcu_qs(void)
{
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) {
trace_rcu_grace_period(TPS("rcu_preempt"),
__this_cpu_read(rcu_data.gp_seq),
TPS("cpuqs"));
@@ -350,7 +352,7 @@ void rcu_note_context_switch(bool preempt)
* means that we continue to block the current grace period.
*/
rcu_qs();
if (rdp->exp_deferred_qs)
if (rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
rcu_tasks_qs(current, preempt);
trace_rcu_utilization(TPS("End context switch"));
@@ -477,7 +479,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
special = t->rcu_read_unlock_special;
rdp = this_cpu_ptr(&rcu_data);
if (!special.s && !rdp->exp_deferred_qs) {
if (!special.s && !rdp->cpu_no_qs.b.exp) {
local_irq_restore(flags);
return;
}
@@ -497,7 +499,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
if (rdp->exp_deferred_qs)
if (rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
/* Clean up if blocked during RCU read-side critical section. */
@@ -580,7 +582,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
rcu_preempt_depth() == 0;
}
@@ -642,7 +644,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
(IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
t->rcu_blocked_node);
// Need to defer quiescent state until everything is enabled.
if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) {
// Using softirq, safe to awaken, and either the
// wakeup is free or there is either an expedited
// GP in flight or a potential need to deboost.
@@ -845,10 +847,8 @@ static void rcu_qs(void)
trace_rcu_grace_period(TPS("rcu_sched"),
__this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
}
/*
@@ -925,7 +925,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return false;
}
static void rcu_preempt_deferred_qs(struct task_struct *t) { }
// Except that we do need to respond to a request by an expedited grace
// period for a quiescent state from this CPU. Note that requests from
// tasks are handled when removing the task from the blocked-tasks list
// below.
static void rcu_preempt_deferred_qs(struct task_struct *t)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
if (rdp->cpu_no_qs.b.exp)
rcu_report_exp_rdp(rdp);
}
/*
* Because there is no preemptible RCU, there can be no readers blocked,
@@ -1153,7 +1164,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
/*
* Create an RCU-boost kthread for the specified node if one does not
* already exist. We only create this kthread for preemptible RCU.
* Returns zero if all is well, a negated errno otherwise.
*/
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
{
@@ -1204,8 +1214,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
if (cpumask_weight(cm) == 0)
cpumask_setall(cm);
cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
set_cpus_allowed_ptr(t, cm);
free_cpumask_var(cm);
}
@@ -1253,201 +1264,6 @@ static void __init rcu_spawn_boost_kthreads(void)
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*
* Check to see if any future non-offloaded RCU-related work will need
* to be done by the current CPU, even if none need be done immediately,
* returning 1 if so. This function is part of the RCU implementation;
* it is -not- an exported member of the RCU API.
*
* Because we not have RCU_FAST_NO_HZ, just check whether or not this
* CPU has RCU callbacks queued.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
*nextevt = KTIME_MAX;
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
* after it.
*/
static void rcu_cleanup_after_idle(void)
{
}
/*
* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
* is nothing.
*/
static void rcu_prepare_for_idle(void)
{
}
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
/*
* This code is invoked when a CPU goes idle, at which point we want
* to have the CPU do everything required for RCU so that it can enter
* the energy-efficient dyntick-idle mode.
*
* The following preprocessor symbol controls this:
*
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
* is sized to be roughly one RCU grace period. Those energy-efficiency
* benchmarkers who might otherwise be tempted to set this to a large
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
* system. And if you are -that- concerned about energy efficiency,
* just power the system down and be done with it!
*
* The value below works well in practice. If future workloads require
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
module_param(rcu_idle_gp_delay, int, 0644);
/*
* Try to advance callbacks on the current CPU, but only if it has been
* awhile since the last time we did so. Afterwards, if there are any
* callbacks ready for immediate invocation, return true.
*/
static bool __maybe_unused rcu_try_advance_all_cbs(void)
{
bool cbs_ready = false;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
/* Exit early if we advanced recently. */
if (jiffies == rdp->last_advance_all)
return false;
rdp->last_advance_all = jiffies;
rnp = rdp->mynode;
/*
* Don't bother checking unless a grace period has
* completed since we last checked and there are
* callbacks not yet ready to invoke.
*/
if ((rcu_seq_completed_gp(rdp->gp_seq,
rcu_seq_current(&rnp->gp_seq)) ||
unlikely(READ_ONCE(rdp->gpwrap))) &&
rcu_segcblist_pend_cbs(&rdp->cblist))
note_gp_changes(rdp);
if (rcu_segcblist_ready_cbs(&rdp->cblist))
cbs_ready = true;
return cbs_ready;
}
/*
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
* caller about what to set the timeout.
*
* The caller must have disabled interrupts.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
unsigned long dj;
lockdep_assert_irqs_disabled();
/* If no non-offloaded callbacks, RCU doesn't need the CPU. */
if (rcu_segcblist_empty(&rdp->cblist) ||
rcu_rdp_is_offloaded(rdp)) {
*nextevt = KTIME_MAX;
return 0;
}
/* Attempt to advance callbacks. */
if (rcu_try_advance_all_cbs()) {
/* Some ready to invoke, so initiate later invocation. */
invoke_rcu_core();
return 1;
}
rdp->last_accelerate = jiffies;
/* Request timer and round. */
dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
*nextevt = basemono + dj * TICK_NSEC;
return 0;
}
/*
* Prepare a CPU for idle from an RCU perspective. The first major task is to
* sense whether nohz mode has been enabled or disabled via sysfs. The second
* major task is to accelerate (that is, assign grace-period numbers to) any
* recently arrived callbacks.
*
* The caller must have disabled interrupts.
*/
static void rcu_prepare_for_idle(void)
{
bool needwake;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
int tne;
lockdep_assert_irqs_disabled();
if (rcu_rdp_is_offloaded(rdp))
return;
/* Handle nohz enablement switches conservatively. */
tne = READ_ONCE(tick_nohz_active);
if (tne != rdp->tick_nohz_enabled_snap) {
if (!rcu_segcblist_empty(&rdp->cblist))
invoke_rcu_core(); /* force nohz to see update. */
rdp->tick_nohz_enabled_snap = tne;
return;
}
if (!tne)
return;
/*
* If we have not yet accelerated this jiffy, accelerate all
* callbacks on this CPU.
*/
if (rdp->last_accelerate == jiffies)
return;
rdp->last_accelerate = jiffies;
if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
needwake = rcu_accelerate_cbs(rnp, rdp);
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
if (needwake)
rcu_gp_kthread_wake();
}
}
/*
* Clean up for exit from idle. Attempt to advance callbacks based on
* any grace periods that elapsed while the CPU was idle, and if any
* callbacks are now ready to invoke, initiate invocation.
*/
static void rcu_cleanup_after_idle(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
lockdep_assert_irqs_disabled();
if (rcu_rdp_is_offloaded(rdp))
return;
if (rcu_try_advance_all_cbs())
invoke_rcu_core();
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
/*
* Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
* grace-period kthread will do force_quiescent_state() processing?
@@ -1455,7 +1271,7 @@ static void rcu_cleanup_after_idle(void)
* CPU unless the grace period has extended for too long.
*
* This code relies on the fact that all NO_HZ_FULL CPUs are also
* CONFIG_RCU_NOCB_CPU CPUs.
* RCU_NOCB_CPU CPUs.
*/
static bool rcu_nohz_full_cpu(void)
{

View File

@@ -347,26 +347,6 @@ static void rcu_dump_cpu_stacks(void)
}
}
#ifdef CONFIG_RCU_FAST_NO_HZ
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
rdp->last_accelerate & 0xffff, jiffies & 0xffff,
!!rdp->tick_nohz_enabled_snap);
}
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
*cp = '\0';
}
#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
static const char * const gp_state_names[] = {
[RCU_GP_IDLE] = "RCU_GP_IDLE",
[RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS",
@@ -408,13 +388,12 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
* of RCU grace periods that this CPU is ignorant of, for example, "1"
* if the CPU was aware of the previous grace period.
*
* Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
* Also print out idle info.
*/
static void print_cpu_stall_info(int cpu)
{
unsigned long delta;
bool falsepositive;
char fast_no_hz[72];
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
char *ticks_title;
unsigned long ticks_value;
@@ -432,11 +411,10 @@ static void print_cpu_stall_info(int cpu)
ticks_title = "ticks this GP";
ticks_value = rdp->ticks_this_gp;
}
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
falsepositive = rcu_is_gp_kthread_starving(NULL) &&
rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
cpu,
"O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
@@ -449,7 +427,6 @@ static void print_cpu_stall_info(int cpu)
rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
fast_no_hz,
falsepositive ? " (false positive?)" : "");
}

View File

@@ -38,14 +38,10 @@
#define SCFTORT_STRING "scftorture"
#define SCFTORT_FLAG SCFTORT_STRING ": "
#define SCFTORTOUT(s, x...) \
pr_alert(SCFTORT_FLAG s, ## x)
#define VERBOSE_SCFTORTOUT(s, x...) \
do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0)
do { if (verbose) pr_alert(SCFTORT_FLAG s "\n", ## x); } while (0)
#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \
do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0)
#define SCFTORTOUT_ERRSTRING(s, x...) pr_alert(SCFTORT_FLAG "!!! " s "\n", ## x)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
@@ -587,14 +583,14 @@ static int __init scf_torture_init(void)
if (weight_resched1 == 0 && weight_single1 == 0 && weight_single_rpc1 == 0 &&
weight_single_wait1 == 0 && weight_many1 == 0 && weight_many_wait1 == 0 &&
weight_all1 == 0 && weight_all_wait1 == 0) {
VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
firsterr = -EINVAL;
goto unwind;
}
if (IS_BUILTIN(CONFIG_SCF_TORTURE_TEST))
scf_sel_add(weight_resched1, SCF_PRIM_RESCHED, false);
else if (weight_resched1)
VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false);
scf_sel_add(weight_single_rpc1, SCF_PRIM_SINGLE_RPC, true);
scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true);
@@ -625,12 +621,12 @@ static int __init scf_torture_init(void)
nthreads = num_online_cpus();
scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL);
if (!scf_stats_p) {
VERBOSE_SCFTORTOUT_ERRSTRING("out of memory");
SCFTORTOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads);
VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads", nthreads);
atomic_set(&n_started, nthreads);
for (i = 0; i < nthreads; i++) {

View File

@@ -570,7 +570,7 @@ int torture_shuffle_init(long shuffint)
shuffle_idle_cpu = -1;
if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask");
TOROUT_ERRSTRING("Failed to alloc mask");
return -ENOMEM;
}
@@ -934,7 +934,7 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
*tp = kthread_run(fn, arg, "%s", s);
if (IS_ERR(*tp)) {
ret = PTR_ERR(*tp);
VERBOSE_TOROUT_ERRSTRING(f);
TOROUT_ERRSTRING(f);
*tp = NULL;
}
torture_shuffle_task_register(*tp);