Merge tag 'rcu.2022.01.09a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu
Pull RCU updates from Paul McKenney: - Documentation updates, perhaps most notably Neil Brown's writeup of the reference-counting analogy to RCU. - Expedited grace-period cleanups. - Remove CONFIG_RCU_FAST_NO_HZ due to lack of valid users. I have asked around, posted a blog entry, and sent this series to LKML without result. - Miscellaneous fixes. - RCU callback offloading updates, perhaps most notably Frederic Weisbecker's updates allowing CPUs booted in the de-offloaded state to be offloaded at runtime. - nolibc fixes from Willy Tarreau and Anmar Faizi, but also including Mark Brown's addition of gettid(). - RCU Tasks Trace fixes, including changes that increase the scalability of call_rcu_tasks_trace() for the BPF folks (Martin Lau and KP Singh). - Various fixes including those from Wander Lairson Costa and Li Zhijian. - Fixes plus addition of tests for the increased call_rcu_tasks_trace() scalability. * tag 'rcu.2022.01.09a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (87 commits) rcu/nocb: Merge rcu_spawn_cpu_nocb_kthread() and rcu_spawn_one_nocb_kthread() rcu/nocb: Allow empty "rcu_nocbs" kernel parameter rcu/nocb: Create kthreads on all CPUs if "rcu_nocbs=" or "nohz_full=" are passed rcu/nocb: Optimize kthreads and rdp initialization rcu/nocb: Prepare nocb_cb_wait() to start with a non-offloaded rdp rcu/nocb: Remove rcu_node structure from nocb list when de-offloaded rcu-tasks: Use fewer callbacks queues if callback flood ends rcu-tasks: Use separate ->percpu_dequeue_lim for callback dequeueing rcu-tasks: Use more callback queues if contention encountered rcu-tasks: Avoid raw-spinlocked wakeups from call_rcu_tasks_generic() rcu-tasks: Count trylocks to estimate call_rcu_tasks() contention rcu-tasks: Add rcupdate.rcu_task_enqueue_lim to set initial queueing rcu-tasks: Make rcu_barrier_tasks*() handle multiple callback queues rcu-tasks: Use workqueues for multiple rcu_tasks_invoke_cbs() invocations rcu-tasks: Abstract invocations of callbacks rcu-tasks: Abstract checking of callback lists rcu-tasks: Add a ->percpu_enqueue_lim to the rcu_tasks structure rcu-tasks: Inspect stalled task's trc state in locked state rcu-tasks: Use spin_lock_rcu_node() and friends rcutorture: Combine n_max_cbs from all kthreads in a callback flood ...
This commit is contained in:
@@ -1047,7 +1047,7 @@ static int __init lock_torture_init(void)
|
||||
sizeof(writer_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (writer_tasks == NULL) {
|
||||
VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
|
||||
TOROUT_ERRSTRING("writer_tasks: Out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
@@ -1058,7 +1058,7 @@ static int __init lock_torture_init(void)
|
||||
sizeof(reader_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (reader_tasks == NULL) {
|
||||
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
|
||||
TOROUT_ERRSTRING("reader_tasks: Out of memory");
|
||||
kfree(writer_tasks);
|
||||
writer_tasks = NULL;
|
||||
firsterr = -ENOMEM;
|
||||
|
||||
@@ -112,7 +112,7 @@ config RCU_STALL_COMMON
|
||||
making these warnings mandatory for the tree variants.
|
||||
|
||||
config RCU_NEED_SEGCBLIST
|
||||
def_bool ( TREE_RCU || TREE_SRCU )
|
||||
def_bool ( TREE_RCU || TREE_SRCU || TASKS_RCU_GENERIC )
|
||||
|
||||
config RCU_FANOUT
|
||||
int "Tree-based hierarchical RCU fanout value"
|
||||
@@ -169,24 +169,6 @@ config RCU_FANOUT_LEAF
|
||||
|
||||
Take the default if unsure.
|
||||
|
||||
config RCU_FAST_NO_HZ
|
||||
bool "Accelerate last non-dyntick-idle CPU's grace periods"
|
||||
depends on NO_HZ_COMMON && SMP && RCU_EXPERT
|
||||
default n
|
||||
help
|
||||
This option permits CPUs to enter dynticks-idle state even if
|
||||
they have RCU callbacks queued, and prevents RCU from waking
|
||||
these CPUs up more than roughly once every four jiffies (by
|
||||
default, you can adjust this using the rcutree.rcu_idle_gp_delay
|
||||
parameter), thus improving energy efficiency. On the other
|
||||
hand, this option increases the duration of RCU grace periods,
|
||||
for example, slowing down synchronize_rcu().
|
||||
|
||||
Say Y if energy efficiency is critically important, and you
|
||||
don't care about increased grace-period durations.
|
||||
|
||||
Say N if you are unsure.
|
||||
|
||||
config RCU_BOOST
|
||||
bool "Enable RCU priority boosting"
|
||||
depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT
|
||||
|
||||
@@ -261,16 +261,14 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the specified rcu_segcblist structure as offloaded.
|
||||
* Mark the specified rcu_segcblist structure as offloaded (or not)
|
||||
*/
|
||||
void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
|
||||
{
|
||||
if (offload) {
|
||||
rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY);
|
||||
rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED);
|
||||
} else {
|
||||
if (offload)
|
||||
rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
|
||||
else
|
||||
rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -80,11 +80,14 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
|
||||
return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED);
|
||||
}
|
||||
|
||||
/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */
|
||||
/*
|
||||
* Is the specified rcu_segcblist NOCB offloaded (or in the middle of the
|
||||
* [de]offloading process)?
|
||||
*/
|
||||
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
|
||||
!rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY))
|
||||
rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -92,9 +95,8 @@ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
|
||||
|
||||
static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp)
|
||||
{
|
||||
int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED;
|
||||
|
||||
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags)
|
||||
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
|
||||
!rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
||||
@@ -50,8 +50,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
|
||||
pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s)
|
||||
#define VERBOSE_SCALEOUT_STRING(s) \
|
||||
do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0)
|
||||
#define VERBOSE_SCALEOUT_ERRSTRING(s) \
|
||||
do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0)
|
||||
#define SCALEOUT_ERRSTRING(s) \
|
||||
pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s)
|
||||
|
||||
/*
|
||||
* The intended use cases for the nreaders and nwriters module parameters
|
||||
@@ -514,11 +514,11 @@ rcu_scale_cleanup(void)
|
||||
* during the mid-boot phase, so have to wait till the end.
|
||||
*/
|
||||
if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
|
||||
VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
|
||||
SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
|
||||
if (rcu_gp_is_normal() && gp_exp)
|
||||
VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
|
||||
SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
|
||||
if (gp_exp && gp_async)
|
||||
VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
|
||||
SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
|
||||
|
||||
if (torture_cleanup_begin())
|
||||
return;
|
||||
@@ -845,7 +845,7 @@ rcu_scale_init(void)
|
||||
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (reader_tasks == NULL) {
|
||||
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
|
||||
SCALEOUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
@@ -865,7 +865,7 @@ rcu_scale_init(void)
|
||||
kcalloc(nrealwriters, sizeof(*writer_n_durations),
|
||||
GFP_KERNEL);
|
||||
if (!writer_tasks || !writer_durations || !writer_n_durations) {
|
||||
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
|
||||
SCALEOUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include <linux/oom.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
#include "rcu.h"
|
||||
|
||||
@@ -53,15 +54,18 @@ MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
|
||||
|
||||
/* Bits for ->extendables field, extendables param, and related definitions. */
|
||||
#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
|
||||
#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1)
|
||||
#define RCUTORTURE_RDR_SHIFT_1 8 /* Put SRCU index in upper bits. */
|
||||
#define RCUTORTURE_RDR_MASK_1 (1 << RCUTORTURE_RDR_SHIFT_1)
|
||||
#define RCUTORTURE_RDR_SHIFT_2 9 /* Put SRCU index in upper bits. */
|
||||
#define RCUTORTURE_RDR_MASK_2 (1 << RCUTORTURE_RDR_SHIFT_2)
|
||||
#define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */
|
||||
#define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */
|
||||
#define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */
|
||||
#define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */
|
||||
#define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */
|
||||
#define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */
|
||||
#define RCUTORTURE_RDR_NBITS 6 /* Number of bits defined above. */
|
||||
#define RCUTORTURE_RDR_RCU_1 0x20 /* ... entering another RCU reader. */
|
||||
#define RCUTORTURE_RDR_RCU_2 0x40 /* ... entering another RCU reader. */
|
||||
#define RCUTORTURE_RDR_NBITS 7 /* Number of bits defined above. */
|
||||
#define RCUTORTURE_MAX_EXTEND \
|
||||
(RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \
|
||||
RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED)
|
||||
@@ -75,7 +79,7 @@ torture_param(int, fqs_duration, 0,
|
||||
"Duration of fqs bursts (us), 0 to disable");
|
||||
torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
|
||||
torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
|
||||
torture_param(bool, fwd_progress, 1, "Test grace-period forward progress");
|
||||
torture_param(int, fwd_progress, 1, "Test grace-period forward progress");
|
||||
torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
|
||||
torture_param(int, fwd_progress_holdoff, 60,
|
||||
"Time between forward-progress tests (s)");
|
||||
@@ -109,6 +113,8 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
|
||||
torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
|
||||
torture_param(int, stall_cpu_holdoff, 10,
|
||||
"Time to wait before starting stall (s).");
|
||||
torture_param(bool, stall_no_softlockup, false,
|
||||
"Avoid softlockup warning during cpu stall.");
|
||||
torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
|
||||
torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
|
||||
torture_param(int, stall_gp_kthread, 0,
|
||||
@@ -140,7 +146,7 @@ static struct task_struct *stats_task;
|
||||
static struct task_struct *fqs_task;
|
||||
static struct task_struct *boost_tasks[NR_CPUS];
|
||||
static struct task_struct *stall_task;
|
||||
static struct task_struct *fwd_prog_task;
|
||||
static struct task_struct **fwd_prog_tasks;
|
||||
static struct task_struct **barrier_cbs_tasks;
|
||||
static struct task_struct *barrier_task;
|
||||
static struct task_struct *read_exit_task;
|
||||
@@ -342,10 +348,12 @@ struct rcu_torture_ops {
|
||||
void (*gp_kthread_dbg)(void);
|
||||
bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
|
||||
int (*stall_dur)(void);
|
||||
long cbflood_max;
|
||||
int irq_capable;
|
||||
int can_boost;
|
||||
int extendables;
|
||||
int slow_gps;
|
||||
int no_pi_lock;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
@@ -667,6 +675,7 @@ static struct rcu_torture_ops srcu_ops = {
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.irq_capable = 1,
|
||||
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
|
||||
.name = "srcu"
|
||||
};
|
||||
|
||||
@@ -700,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = {
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.irq_capable = 1,
|
||||
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
|
||||
.name = "srcud"
|
||||
};
|
||||
|
||||
@@ -720,6 +730,7 @@ static struct rcu_torture_ops busted_srcud_ops = {
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.irq_capable = 1,
|
||||
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
|
||||
.extendables = RCUTORTURE_MAX_EXTEND,
|
||||
.name = "busted_srcud"
|
||||
};
|
||||
@@ -831,6 +842,7 @@ static struct rcu_torture_ops tasks_rude_ops = {
|
||||
.call = call_rcu_tasks_rude,
|
||||
.cb_barrier = rcu_barrier_tasks_rude,
|
||||
.gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
|
||||
.cbflood_max = 50000,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
@@ -871,6 +883,7 @@ static struct rcu_torture_ops tasks_tracing_ops = {
|
||||
.call = call_rcu_tasks_trace,
|
||||
.cb_barrier = rcu_barrier_tasks_trace,
|
||||
.gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
|
||||
.cbflood_max = 50000,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
@@ -1420,13 +1433,15 @@ static void rcutorture_one_extend(int *readstate, int newstate,
|
||||
struct rt_read_seg *rtrsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
int idxnew = -1;
|
||||
int idxold = *readstate;
|
||||
int idxnew1 = -1;
|
||||
int idxnew2 = -1;
|
||||
int idxold1 = *readstate;
|
||||
int idxold2 = idxold1;
|
||||
int statesnew = ~*readstate & newstate;
|
||||
int statesold = *readstate & ~newstate;
|
||||
|
||||
WARN_ON_ONCE(idxold < 0);
|
||||
WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
|
||||
WARN_ON_ONCE(idxold2 < 0);
|
||||
WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
|
||||
rtrsp->rt_readstate = newstate;
|
||||
|
||||
/* First, put new protection in place to avoid critical-section gap. */
|
||||
@@ -1440,8 +1455,10 @@ static void rcutorture_one_extend(int *readstate, int newstate,
|
||||
preempt_disable();
|
||||
if (statesnew & RCUTORTURE_RDR_SCHED)
|
||||
rcu_read_lock_sched();
|
||||
if (statesnew & RCUTORTURE_RDR_RCU)
|
||||
idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
|
||||
if (statesnew & RCUTORTURE_RDR_RCU_1)
|
||||
idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1;
|
||||
if (statesnew & RCUTORTURE_RDR_RCU_2)
|
||||
idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2;
|
||||
|
||||
/*
|
||||
* Next, remove old protection, in decreasing order of strength
|
||||
@@ -1460,12 +1477,20 @@ static void rcutorture_one_extend(int *readstate, int newstate,
|
||||
local_bh_enable();
|
||||
if (statesold & RCUTORTURE_RDR_RBH)
|
||||
rcu_read_unlock_bh();
|
||||
if (statesold & RCUTORTURE_RDR_RCU) {
|
||||
bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
|
||||
if (statesold & RCUTORTURE_RDR_RCU_2) {
|
||||
cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1);
|
||||
WARN_ON_ONCE(idxnew2 != -1);
|
||||
idxold2 = 0;
|
||||
}
|
||||
if (statesold & RCUTORTURE_RDR_RCU_1) {
|
||||
bool lockit;
|
||||
|
||||
lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff);
|
||||
if (lockit)
|
||||
raw_spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
|
||||
cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1);
|
||||
WARN_ON_ONCE(idxnew1 != -1);
|
||||
idxold1 = 0;
|
||||
if (lockit)
|
||||
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
}
|
||||
@@ -1475,13 +1500,19 @@ static void rcutorture_one_extend(int *readstate, int newstate,
|
||||
cur_ops->read_delay(trsp, rtrsp);
|
||||
|
||||
/* Update the reader state. */
|
||||
if (idxnew == -1)
|
||||
idxnew = idxold & ~RCUTORTURE_RDR_MASK;
|
||||
WARN_ON_ONCE(idxnew < 0);
|
||||
WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1);
|
||||
*readstate = idxnew | newstate;
|
||||
WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0);
|
||||
WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1);
|
||||
if (idxnew1 == -1)
|
||||
idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1;
|
||||
WARN_ON_ONCE(idxnew1 < 0);
|
||||
if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1))
|
||||
pr_info("Unexpected idxnew1 value of %#x\n", idxnew1);
|
||||
if (idxnew2 == -1)
|
||||
idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2;
|
||||
WARN_ON_ONCE(idxnew2 < 0);
|
||||
WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
|
||||
*readstate = idxnew1 | idxnew2 | newstate;
|
||||
WARN_ON_ONCE(*readstate < 0);
|
||||
if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1))
|
||||
pr_info("Unexpected idxnew2 value of %#x\n", idxnew2);
|
||||
}
|
||||
|
||||
/* Return the biggest extendables mask given current RCU and boot parameters. */
|
||||
@@ -1491,7 +1522,7 @@ static int rcutorture_extend_mask_max(void)
|
||||
|
||||
WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND);
|
||||
mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables;
|
||||
mask = mask | RCUTORTURE_RDR_RCU;
|
||||
mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
|
||||
return mask;
|
||||
}
|
||||
|
||||
@@ -1506,13 +1537,21 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
|
||||
unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
|
||||
unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
|
||||
|
||||
WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
|
||||
WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1);
|
||||
/* Mostly only one bit (need preemption!), sometimes lots of bits. */
|
||||
if (!(randmask1 & 0x7))
|
||||
mask = mask & randmask2;
|
||||
else
|
||||
mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
|
||||
|
||||
// Can't have nested RCU reader without outer RCU reader.
|
||||
if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) {
|
||||
if (oldmask & RCUTORTURE_RDR_RCU_1)
|
||||
mask &= ~RCUTORTURE_RDR_RCU_2;
|
||||
else
|
||||
mask |= RCUTORTURE_RDR_RCU_1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Can't enable bh w/irq disabled.
|
||||
*/
|
||||
@@ -1532,7 +1571,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
|
||||
mask |= oldmask & bhs;
|
||||
}
|
||||
|
||||
return mask ?: RCUTORTURE_RDR_RCU;
|
||||
return mask ?: RCUTORTURE_RDR_RCU_1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1626,7 +1665,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
|
||||
rcu_torture_writer_state,
|
||||
cookie, cur_ops->get_gp_state());
|
||||
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
|
||||
WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
|
||||
WARN_ON_ONCE(readstate);
|
||||
// This next splat is expected behavior if leakpointer, especially
|
||||
// for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
|
||||
WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
|
||||
@@ -2052,6 +2091,8 @@ static int rcu_torture_stall(void *args)
|
||||
#else
|
||||
schedule_timeout_uninterruptible(HZ);
|
||||
#endif
|
||||
} else if (stall_no_softlockup) {
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
if (stall_cpu_irqsoff)
|
||||
local_irq_enable();
|
||||
@@ -2123,10 +2164,13 @@ struct rcu_fwd {
|
||||
unsigned long rcu_fwd_startat;
|
||||
struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST];
|
||||
unsigned long rcu_launder_gp_seq_start;
|
||||
int rcu_fwd_id;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(rcu_fwd_mutex);
|
||||
static struct rcu_fwd *rcu_fwds;
|
||||
static unsigned long rcu_fwd_seq;
|
||||
static atomic_long_t rcu_fwd_max_cbs;
|
||||
static bool rcu_fwd_emergency_stop;
|
||||
|
||||
static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
|
||||
@@ -2139,8 +2183,9 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
|
||||
for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
|
||||
if (rfp->n_launders_hist[i].n_launders > 0)
|
||||
break;
|
||||
pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):",
|
||||
__func__, jiffies - rfp->rcu_fwd_startat);
|
||||
mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
|
||||
pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
|
||||
__func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
|
||||
gps_old = rfp->rcu_launder_gp_seq_start;
|
||||
for (j = 0; j <= i; j++) {
|
||||
gps = rfp->n_launders_hist[j].launder_gp_seq;
|
||||
@@ -2151,6 +2196,7 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
|
||||
gps_old = gps;
|
||||
}
|
||||
pr_cont("\n");
|
||||
mutex_unlock(&rcu_fwd_mutex);
|
||||
}
|
||||
|
||||
/* Callback function for continuous-flood RCU callbacks. */
|
||||
@@ -2276,7 +2322,8 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
|
||||
cver = READ_ONCE(rcu_torture_current_version) - cver;
|
||||
gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
|
||||
WARN_ON(!cver && gps < 2);
|
||||
pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps);
|
||||
pr_alert("%s: %d Duration %ld cver %ld gps %ld\n", __func__,
|
||||
rfp->rcu_fwd_id, dur, cver, gps);
|
||||
}
|
||||
if (selfpropcb) {
|
||||
WRITE_ONCE(fcs.stop, 1);
|
||||
@@ -2344,7 +2391,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
|
||||
rfp->rcu_fwd_cb_head = rfcpn;
|
||||
n_launders++;
|
||||
n_launders_sa++;
|
||||
} else {
|
||||
} else if (!cur_ops->cbflood_max || cur_ops->cbflood_max > n_max_cbs) {
|
||||
rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL);
|
||||
if (WARN_ON_ONCE(!rfcp)) {
|
||||
schedule_timeout_interruptible(1);
|
||||
@@ -2354,8 +2401,11 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
|
||||
n_launders_sa = 0;
|
||||
rfcp->rfc_gps = 0;
|
||||
rfcp->rfc_rfp = rfp;
|
||||
} else {
|
||||
rfcp = NULL;
|
||||
}
|
||||
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
|
||||
if (rfcp)
|
||||
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
|
||||
rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs);
|
||||
if (tick_nohz_full_enabled()) {
|
||||
local_irq_save(flags);
|
||||
@@ -2379,6 +2429,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
|
||||
n_launders + n_max_cbs - n_launders_cb_snap,
|
||||
n_launders, n_launders_sa,
|
||||
n_max_gps, n_max_cbs, cver, gps);
|
||||
atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
|
||||
rcu_torture_fwd_cb_hist(rfp);
|
||||
}
|
||||
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
|
||||
@@ -2394,6 +2445,8 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
|
||||
static int rcutorture_oom_notify(struct notifier_block *self,
|
||||
unsigned long notused, void *nfreed)
|
||||
{
|
||||
int i;
|
||||
long ncbs;
|
||||
struct rcu_fwd *rfp;
|
||||
|
||||
mutex_lock(&rcu_fwd_mutex);
|
||||
@@ -2404,18 +2457,26 @@ static int rcutorture_oom_notify(struct notifier_block *self,
|
||||
}
|
||||
WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
|
||||
__func__);
|
||||
rcu_torture_fwd_cb_hist(rfp);
|
||||
rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp->rcu_fwd_startat)) / 2);
|
||||
for (i = 0; i < fwd_progress; i++) {
|
||||
rcu_torture_fwd_cb_hist(&rfp[i]);
|
||||
rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp[i].rcu_fwd_startat)) / 2);
|
||||
}
|
||||
WRITE_ONCE(rcu_fwd_emergency_stop, true);
|
||||
smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
|
||||
pr_info("%s: Freed %lu RCU callbacks.\n",
|
||||
__func__, rcu_torture_fwd_prog_cbfree(rfp));
|
||||
ncbs = 0;
|
||||
for (i = 0; i < fwd_progress; i++)
|
||||
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
|
||||
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
|
||||
rcu_barrier();
|
||||
pr_info("%s: Freed %lu RCU callbacks.\n",
|
||||
__func__, rcu_torture_fwd_prog_cbfree(rfp));
|
||||
ncbs = 0;
|
||||
for (i = 0; i < fwd_progress; i++)
|
||||
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
|
||||
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
|
||||
rcu_barrier();
|
||||
pr_info("%s: Freed %lu RCU callbacks.\n",
|
||||
__func__, rcu_torture_fwd_prog_cbfree(rfp));
|
||||
ncbs = 0;
|
||||
for (i = 0; i < fwd_progress; i++)
|
||||
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
|
||||
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
|
||||
smp_mb(); /* Frees before return to avoid redoing OOM. */
|
||||
(*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */
|
||||
pr_info("%s returning after OOM processing.\n", __func__);
|
||||
@@ -2430,7 +2491,10 @@ static struct notifier_block rcutorture_oom_nb = {
|
||||
/* Carry out grace-period forward-progress testing. */
|
||||
static int rcu_torture_fwd_prog(void *args)
|
||||
{
|
||||
bool firsttime = true;
|
||||
long max_cbs;
|
||||
int oldnice = task_nice(current);
|
||||
unsigned long oldseq = READ_ONCE(rcu_fwd_seq);
|
||||
struct rcu_fwd *rfp = args;
|
||||
int tested = 0;
|
||||
int tested_tries = 0;
|
||||
@@ -2440,21 +2504,38 @@ static int rcu_torture_fwd_prog(void *args)
|
||||
if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST))
|
||||
set_user_nice(current, MAX_NICE);
|
||||
do {
|
||||
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
|
||||
WRITE_ONCE(rcu_fwd_emergency_stop, false);
|
||||
if (!IS_ENABLED(CONFIG_TINY_RCU) ||
|
||||
rcu_inkernel_boot_has_ended())
|
||||
rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
|
||||
if (rcu_inkernel_boot_has_ended())
|
||||
if (!rfp->rcu_fwd_id) {
|
||||
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
|
||||
WRITE_ONCE(rcu_fwd_emergency_stop, false);
|
||||
if (!firsttime) {
|
||||
max_cbs = atomic_long_xchg(&rcu_fwd_max_cbs, 0);
|
||||
pr_alert("%s n_max_cbs: %ld\n", __func__, max_cbs);
|
||||
}
|
||||
firsttime = false;
|
||||
WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
|
||||
} else {
|
||||
while (READ_ONCE(rcu_fwd_seq) == oldseq)
|
||||
schedule_timeout_interruptible(1);
|
||||
oldseq = READ_ONCE(rcu_fwd_seq);
|
||||
}
|
||||
pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
|
||||
if (rcu_inkernel_boot_has_ended() && torture_num_online_cpus() > rfp->rcu_fwd_id)
|
||||
rcu_torture_fwd_prog_cr(rfp);
|
||||
if ((cur_ops->stall_dur && cur_ops->stall_dur() > 0) &&
|
||||
(!IS_ENABLED(CONFIG_TINY_RCU) ||
|
||||
(rcu_inkernel_boot_has_ended() &&
|
||||
torture_num_online_cpus() > rfp->rcu_fwd_id)))
|
||||
rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
|
||||
|
||||
/* Avoid slow periods, better to test when busy. */
|
||||
if (stutter_wait("rcu_torture_fwd_prog"))
|
||||
sched_set_normal(current, oldnice);
|
||||
} while (!torture_must_stop());
|
||||
/* Short runs might not contain a valid forward-progress attempt. */
|
||||
WARN_ON(!tested && tested_tries >= 5);
|
||||
pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
|
||||
if (!rfp->rcu_fwd_id) {
|
||||
WARN_ON(!tested && tested_tries >= 5);
|
||||
pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
|
||||
}
|
||||
torture_kthread_stopping("rcu_torture_fwd_prog");
|
||||
return 0;
|
||||
}
|
||||
@@ -2462,17 +2543,28 @@ static int rcu_torture_fwd_prog(void *args)
|
||||
/* If forward-progress checking is requested and feasible, spawn the thread. */
|
||||
static int __init rcu_torture_fwd_prog_init(void)
|
||||
{
|
||||
int i;
|
||||
int ret = 0;
|
||||
struct rcu_fwd *rfp;
|
||||
|
||||
if (!fwd_progress)
|
||||
return 0; /* Not requested, so don't do it. */
|
||||
if (fwd_progress >= nr_cpu_ids) {
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Limiting fwd_progress to # CPUs.\n");
|
||||
fwd_progress = nr_cpu_ids;
|
||||
} else if (fwd_progress < 0) {
|
||||
fwd_progress = nr_cpu_ids;
|
||||
}
|
||||
if ((!cur_ops->sync && !cur_ops->call) ||
|
||||
!cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) {
|
||||
(!cur_ops->cbflood_max && (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0)) ||
|
||||
cur_ops == &rcu_busted_ops) {
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
|
||||
fwd_progress = 0;
|
||||
return 0;
|
||||
}
|
||||
if (stall_cpu > 0) {
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
|
||||
fwd_progress = 0;
|
||||
if (IS_MODULE(CONFIG_RCU_TORTURE_TEST))
|
||||
return -EINVAL; /* In module, can fail back to user. */
|
||||
WARN_ON(1); /* Make sure rcutorture notices conflict. */
|
||||
@@ -2482,29 +2574,51 @@ static int __init rcu_torture_fwd_prog_init(void)
|
||||
fwd_progress_holdoff = 1;
|
||||
if (fwd_progress_div <= 0)
|
||||
fwd_progress_div = 4;
|
||||
rfp = kzalloc(sizeof(*rfp), GFP_KERNEL);
|
||||
if (!rfp)
|
||||
rfp = kcalloc(fwd_progress, sizeof(*rfp), GFP_KERNEL);
|
||||
fwd_prog_tasks = kcalloc(fwd_progress, sizeof(*fwd_prog_tasks), GFP_KERNEL);
|
||||
if (!rfp || !fwd_prog_tasks) {
|
||||
kfree(rfp);
|
||||
kfree(fwd_prog_tasks);
|
||||
fwd_prog_tasks = NULL;
|
||||
fwd_progress = 0;
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&rfp->rcu_fwd_lock);
|
||||
rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head;
|
||||
}
|
||||
for (i = 0; i < fwd_progress; i++) {
|
||||
spin_lock_init(&rfp[i].rcu_fwd_lock);
|
||||
rfp[i].rcu_fwd_cb_tail = &rfp[i].rcu_fwd_cb_head;
|
||||
rfp[i].rcu_fwd_id = i;
|
||||
}
|
||||
mutex_lock(&rcu_fwd_mutex);
|
||||
rcu_fwds = rfp;
|
||||
mutex_unlock(&rcu_fwd_mutex);
|
||||
register_oom_notifier(&rcutorture_oom_nb);
|
||||
return torture_create_kthread(rcu_torture_fwd_prog, rfp, fwd_prog_task);
|
||||
for (i = 0; i < fwd_progress; i++) {
|
||||
ret = torture_create_kthread(rcu_torture_fwd_prog, &rcu_fwds[i], fwd_prog_tasks[i]);
|
||||
if (ret) {
|
||||
fwd_progress = i;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rcu_torture_fwd_prog_cleanup(void)
|
||||
{
|
||||
int i;
|
||||
struct rcu_fwd *rfp;
|
||||
|
||||
torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
|
||||
rfp = rcu_fwds;
|
||||
if (!rcu_fwds || !fwd_prog_tasks)
|
||||
return;
|
||||
for (i = 0; i < fwd_progress; i++)
|
||||
torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_tasks[i]);
|
||||
unregister_oom_notifier(&rcutorture_oom_nb);
|
||||
mutex_lock(&rcu_fwd_mutex);
|
||||
rfp = rcu_fwds;
|
||||
rcu_fwds = NULL;
|
||||
mutex_unlock(&rcu_fwd_mutex);
|
||||
unregister_oom_notifier(&rcutorture_oom_nb);
|
||||
kfree(rfp);
|
||||
kfree(fwd_prog_tasks);
|
||||
fwd_prog_tasks = NULL;
|
||||
}
|
||||
|
||||
/* Callback function for RCU barrier testing. */
|
||||
@@ -2741,7 +2855,7 @@ static int rcu_torture_read_exit(void *unused)
|
||||
&trs, "%s",
|
||||
"rcu_torture_read_exit_child");
|
||||
if (IS_ERR(tsp)) {
|
||||
VERBOSE_TOROUT_ERRSTRING("out of memory");
|
||||
TOROUT_ERRSTRING("out of memory");
|
||||
errexit = true;
|
||||
tsp = NULL;
|
||||
break;
|
||||
@@ -3068,7 +3182,7 @@ rcu_torture_init(void)
|
||||
sizeof(fakewriter_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (fakewriter_tasks == NULL) {
|
||||
VERBOSE_TOROUT_ERRSTRING("out of memory");
|
||||
TOROUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
@@ -3084,7 +3198,7 @@ rcu_torture_init(void)
|
||||
rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk),
|
||||
GFP_KERNEL);
|
||||
if (!reader_tasks || !rcu_torture_reader_mbchk) {
|
||||
VERBOSE_TOROUT_ERRSTRING("out of memory");
|
||||
TOROUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
@@ -3103,7 +3217,7 @@ rcu_torture_init(void)
|
||||
if (nrealnocbers > 0) {
|
||||
nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL);
|
||||
if (nocb_tasks == NULL) {
|
||||
VERBOSE_TOROUT_ERRSTRING("out of memory");
|
||||
TOROUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
@@ -44,7 +44,10 @@
|
||||
pr_alert("%s" SCALE_FLAG s, scale_type, ## x)
|
||||
|
||||
#define VERBOSE_SCALEOUT(s, x...) \
|
||||
do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0)
|
||||
do { \
|
||||
if (verbose) \
|
||||
pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \
|
||||
} while (0)
|
||||
|
||||
static atomic_t verbose_batch_ctr;
|
||||
|
||||
@@ -54,12 +57,11 @@ do { \
|
||||
(verbose_batched <= 0 || \
|
||||
!(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \
|
||||
schedule_timeout_uninterruptible(1); \
|
||||
pr_alert("%s" SCALE_FLAG s, scale_type, ## x); \
|
||||
pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \
|
||||
do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0)
|
||||
#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x)
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
|
||||
@@ -604,7 +606,7 @@ static u64 process_durations(int n)
|
||||
char *buf;
|
||||
u64 sum = 0;
|
||||
|
||||
buf = kmalloc(128 + nreaders * 32, GFP_KERNEL);
|
||||
buf = kmalloc(800 + 64, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return 0;
|
||||
buf[0] = 0;
|
||||
@@ -617,13 +619,15 @@ static u64 process_durations(int n)
|
||||
|
||||
if (i % 5 == 0)
|
||||
strcat(buf, "\n");
|
||||
if (strlen(buf) >= 800) {
|
||||
pr_alert("%s", buf);
|
||||
buf[0] = 0;
|
||||
}
|
||||
strcat(buf, buf1);
|
||||
|
||||
sum += rt->last_duration_ns;
|
||||
}
|
||||
strcat(buf, "\n");
|
||||
|
||||
SCALEOUT("%s\n", buf);
|
||||
pr_alert("%s\n", buf);
|
||||
|
||||
kfree(buf);
|
||||
return sum;
|
||||
@@ -637,7 +641,6 @@ static u64 process_durations(int n)
|
||||
// point all the timestamps are printed.
|
||||
static int main_func(void *arg)
|
||||
{
|
||||
bool errexit = false;
|
||||
int exp, r;
|
||||
char buf1[64];
|
||||
char *buf;
|
||||
@@ -648,10 +651,10 @@ static int main_func(void *arg)
|
||||
|
||||
VERBOSE_SCALEOUT("main_func task started");
|
||||
result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
|
||||
buf = kzalloc(64 + nruns * 32, GFP_KERNEL);
|
||||
buf = kzalloc(800 + 64, GFP_KERNEL);
|
||||
if (!result_avg || !buf) {
|
||||
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
|
||||
errexit = true;
|
||||
SCALEOUT_ERRSTRING("out of memory");
|
||||
goto oom_exit;
|
||||
}
|
||||
if (holdoff)
|
||||
schedule_timeout_interruptible(holdoff * HZ);
|
||||
@@ -663,8 +666,6 @@ static int main_func(void *arg)
|
||||
|
||||
// Start exp readers up per experiment
|
||||
for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
|
||||
if (errexit)
|
||||
break;
|
||||
if (torture_must_stop())
|
||||
goto end;
|
||||
|
||||
@@ -698,26 +699,23 @@ static int main_func(void *arg)
|
||||
// Print the average of all experiments
|
||||
SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
|
||||
|
||||
if (!errexit) {
|
||||
buf[0] = 0;
|
||||
strcat(buf, "\n");
|
||||
strcat(buf, "Runs\tTime(ns)\n");
|
||||
}
|
||||
|
||||
pr_alert("Runs\tTime(ns)\n");
|
||||
for (exp = 0; exp < nruns; exp++) {
|
||||
u64 avg;
|
||||
u32 rem;
|
||||
|
||||
if (errexit)
|
||||
break;
|
||||
avg = div_u64_rem(result_avg[exp], 1000, &rem);
|
||||
sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem);
|
||||
strcat(buf, buf1);
|
||||
if (strlen(buf) >= 800) {
|
||||
pr_alert("%s", buf);
|
||||
buf[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!errexit)
|
||||
SCALEOUT("%s", buf);
|
||||
pr_alert("%s", buf);
|
||||
|
||||
oom_exit:
|
||||
// This will shutdown everything including us.
|
||||
if (shutdown) {
|
||||
shutdown_start = 1;
|
||||
@@ -841,12 +839,12 @@ ref_scale_init(void)
|
||||
reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (!reader_tasks) {
|
||||
VERBOSE_SCALEOUT_ERRSTRING("out of memory");
|
||||
SCALEOUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders);
|
||||
VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
|
||||
|
||||
for (i = 0; i < nreaders; i++) {
|
||||
firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
|
||||
|
||||
@@ -99,7 +99,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
|
||||
int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
|
||||
|
||||
WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
|
||||
if (!newval && READ_ONCE(ssp->srcu_gp_waiting))
|
||||
if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task())
|
||||
swake_up_one(&ssp->srcu_wq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU_GENERIC
|
||||
#include "rcu_segcblist.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
@@ -19,12 +20,34 @@ typedef void (*postscan_func_t)(struct list_head *hop);
|
||||
typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
|
||||
typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
|
||||
|
||||
/**
|
||||
* struct rcu_tasks_percpu - Per-CPU component of definition for a Tasks-RCU-like mechanism.
|
||||
* @cblist: Callback list.
|
||||
* @lock: Lock protecting per-CPU callback list.
|
||||
* @rtp_jiffies: Jiffies counter value for statistics.
|
||||
* @rtp_n_lock_retries: Rough lock-contention statistic.
|
||||
* @rtp_work: Work queue for invoking callbacks.
|
||||
* @rtp_irq_work: IRQ work queue for deferred wakeups.
|
||||
* @barrier_q_head: RCU callback for barrier operation.
|
||||
* @cpu: CPU number corresponding to this entry.
|
||||
* @rtpp: Pointer to the rcu_tasks structure.
|
||||
*/
|
||||
struct rcu_tasks_percpu {
|
||||
struct rcu_segcblist cblist;
|
||||
raw_spinlock_t __private lock;
|
||||
unsigned long rtp_jiffies;
|
||||
unsigned long rtp_n_lock_retries;
|
||||
struct work_struct rtp_work;
|
||||
struct irq_work rtp_irq_work;
|
||||
struct rcu_head barrier_q_head;
|
||||
int cpu;
|
||||
struct rcu_tasks *rtpp;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
|
||||
* @cbs_head: Head of callback list.
|
||||
* @cbs_tail: Tail pointer for callback list.
|
||||
* @cbs_wq: Wait queue allowing new callback to get kthread's attention.
|
||||
* @cbs_lock: Lock protecting callback list.
|
||||
* @cbs_gbl_lock: Lock protecting callback list.
|
||||
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
|
||||
* @gp_func: This flavor's grace-period-wait function.
|
||||
* @gp_state: Grace period's most recent state transition (debugging).
|
||||
@@ -32,7 +55,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
|
||||
* @init_fract: Initial backoff sleep interval.
|
||||
* @gp_jiffies: Time of last @gp_state transition.
|
||||
* @gp_start: Most recent grace-period start in jiffies.
|
||||
* @n_gps: Number of grace periods completed since boot.
|
||||
* @tasks_gp_seq: Number of grace periods completed since boot.
|
||||
* @n_ipis: Number of IPIs sent to encourage grace periods to end.
|
||||
* @n_ipis_fails: Number of IPI-send failures.
|
||||
* @pregp_func: This flavor's pre-grace-period function (optional).
|
||||
@@ -41,20 +64,27 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
|
||||
* @holdouts_func: This flavor's holdout-list scan function (optional).
|
||||
* @postgp_func: This flavor's post-grace-period function (optional).
|
||||
* @call_func: This flavor's call_rcu()-equivalent function.
|
||||
* @rtpcpu: This flavor's rcu_tasks_percpu structure.
|
||||
* @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks.
|
||||
* @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing.
|
||||
* @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing.
|
||||
* @percpu_dequeue_gpseq: RCU grace-period number to propagate enqueue limit to dequeuers.
|
||||
* @barrier_q_mutex: Serialize barrier operations.
|
||||
* @barrier_q_count: Number of queues being waited on.
|
||||
* @barrier_q_completion: Barrier wait/wakeup mechanism.
|
||||
* @barrier_q_seq: Sequence number for barrier operations.
|
||||
* @name: This flavor's textual name.
|
||||
* @kname: This flavor's kthread name.
|
||||
*/
|
||||
struct rcu_tasks {
|
||||
struct rcu_head *cbs_head;
|
||||
struct rcu_head **cbs_tail;
|
||||
struct wait_queue_head cbs_wq;
|
||||
raw_spinlock_t cbs_lock;
|
||||
raw_spinlock_t cbs_gbl_lock;
|
||||
int gp_state;
|
||||
int gp_sleep;
|
||||
int init_fract;
|
||||
unsigned long gp_jiffies;
|
||||
unsigned long gp_start;
|
||||
unsigned long n_gps;
|
||||
unsigned long tasks_gp_seq;
|
||||
unsigned long n_ipis;
|
||||
unsigned long n_ipis_fails;
|
||||
struct task_struct *kthread_ptr;
|
||||
@@ -65,20 +95,40 @@ struct rcu_tasks {
|
||||
holdouts_func_t holdouts_func;
|
||||
postgp_func_t postgp_func;
|
||||
call_rcu_func_t call_func;
|
||||
struct rcu_tasks_percpu __percpu *rtpcpu;
|
||||
int percpu_enqueue_shift;
|
||||
int percpu_enqueue_lim;
|
||||
int percpu_dequeue_lim;
|
||||
unsigned long percpu_dequeue_gpseq;
|
||||
struct mutex barrier_q_mutex;
|
||||
atomic_t barrier_q_count;
|
||||
struct completion barrier_q_completion;
|
||||
unsigned long barrier_q_seq;
|
||||
char *name;
|
||||
char *kname;
|
||||
};
|
||||
|
||||
#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
|
||||
static struct rcu_tasks rt_name = \
|
||||
{ \
|
||||
.cbs_tail = &rt_name.cbs_head, \
|
||||
.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
|
||||
.cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock), \
|
||||
.gp_func = gp, \
|
||||
.call_func = call, \
|
||||
.name = n, \
|
||||
.kname = #rt_name, \
|
||||
static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
|
||||
|
||||
#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
|
||||
static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \
|
||||
.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \
|
||||
.rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \
|
||||
}; \
|
||||
static struct rcu_tasks rt_name = \
|
||||
{ \
|
||||
.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
|
||||
.cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \
|
||||
.gp_func = gp, \
|
||||
.call_func = call, \
|
||||
.rtpcpu = &rt_name ## __percpu, \
|
||||
.name = n, \
|
||||
.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \
|
||||
.percpu_enqueue_lim = 1, \
|
||||
.percpu_dequeue_lim = 1, \
|
||||
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
|
||||
.barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT, \
|
||||
.kname = #rt_name, \
|
||||
}
|
||||
|
||||
/* Track exiting tasks in order to allow them to be waited for. */
|
||||
@@ -94,6 +144,15 @@ module_param(rcu_task_ipi_delay, int, 0644);
|
||||
static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
|
||||
module_param(rcu_task_stall_timeout, int, 0644);
|
||||
|
||||
static int rcu_task_enqueue_lim __read_mostly = -1;
|
||||
module_param(rcu_task_enqueue_lim, int, 0444);
|
||||
|
||||
static bool rcu_task_cb_adjust;
|
||||
static int rcu_task_contend_lim __read_mostly = 100;
|
||||
module_param(rcu_task_contend_lim, int, 0444);
|
||||
static int rcu_task_collapse_lim __read_mostly = 10;
|
||||
module_param(rcu_task_collapse_lim, int, 0444);
|
||||
|
||||
/* RCU tasks grace-period state for debugging. */
|
||||
#define RTGS_INIT 0
|
||||
#define RTGS_WAIT_WAIT_CBS 1
|
||||
@@ -128,6 +187,8 @@ static const char * const rcu_tasks_gp_state_names[] = {
|
||||
//
|
||||
// Generic code.
|
||||
|
||||
static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp);
|
||||
|
||||
/* Record grace-period phase and time. */
|
||||
static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
|
||||
{
|
||||
@@ -148,23 +209,106 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
|
||||
}
|
||||
#endif /* #ifndef CONFIG_TINY_RCU */
|
||||
|
||||
// Initialize per-CPU callback lists for the specified flavor of
|
||||
// Tasks RCU.
|
||||
static void cblist_init_generic(struct rcu_tasks *rtp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
int lim;
|
||||
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rcu_task_enqueue_lim < 0) {
|
||||
rcu_task_enqueue_lim = 1;
|
||||
rcu_task_cb_adjust = true;
|
||||
pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
|
||||
} else if (rcu_task_enqueue_lim == 0) {
|
||||
rcu_task_enqueue_lim = 1;
|
||||
}
|
||||
lim = rcu_task_enqueue_lim;
|
||||
|
||||
if (lim > nr_cpu_ids)
|
||||
lim = nr_cpu_ids;
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim));
|
||||
WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, lim);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
|
||||
|
||||
WARN_ON_ONCE(!rtpcp);
|
||||
if (cpu)
|
||||
raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
|
||||
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
|
||||
if (rcu_segcblist_empty(&rtpcp->cblist))
|
||||
rcu_segcblist_init(&rtpcp->cblist);
|
||||
INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
|
||||
rtpcp->cpu = cpu;
|
||||
rtpcp->rtpp = rtp;
|
||||
raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
|
||||
pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
|
||||
}
|
||||
|
||||
// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
|
||||
static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
|
||||
{
|
||||
struct rcu_tasks *rtp;
|
||||
struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
|
||||
|
||||
rtp = rtpcp->rtpp;
|
||||
wake_up(&rtp->cbs_wq);
|
||||
}
|
||||
|
||||
// Enqueue a callback for the specified flavor of Tasks RCU.
|
||||
static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
|
||||
struct rcu_tasks *rtp)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long j;
|
||||
bool needadjust = false;
|
||||
bool needwake;
|
||||
struct rcu_tasks_percpu *rtpcp;
|
||||
|
||||
rhp->next = NULL;
|
||||
rhp->func = func;
|
||||
raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
|
||||
needwake = !rtp->cbs_head;
|
||||
WRITE_ONCE(*rtp->cbs_tail, rhp);
|
||||
rtp->cbs_tail = &rhp->next;
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
|
||||
local_irq_save(flags);
|
||||
rcu_read_lock();
|
||||
rtpcp = per_cpu_ptr(rtp->rtpcpu,
|
||||
smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift));
|
||||
if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled.
|
||||
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
|
||||
j = jiffies;
|
||||
if (rtpcp->rtp_jiffies != j) {
|
||||
rtpcp->rtp_jiffies = j;
|
||||
rtpcp->rtp_n_lock_retries = 0;
|
||||
}
|
||||
if (rcu_task_cb_adjust && ++rtpcp->rtp_n_lock_retries > rcu_task_contend_lim &&
|
||||
READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids)
|
||||
needadjust = true; // Defer adjustment to avoid deadlock.
|
||||
}
|
||||
if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) {
|
||||
raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
|
||||
cblist_init_generic(rtp);
|
||||
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
|
||||
}
|
||||
needwake = rcu_segcblist_empty(&rtpcp->cblist);
|
||||
rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
|
||||
if (unlikely(needadjust)) {
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
|
||||
WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
|
||||
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
/* We can't create the thread unless interrupts are enabled. */
|
||||
if (needwake && READ_ONCE(rtp->kthread_ptr))
|
||||
wake_up(&rtp->cbs_wq);
|
||||
irq_work_queue(&rtpcp->rtp_irq_work);
|
||||
}
|
||||
|
||||
// Wait for a grace period for the specified flavor of Tasks RCU.
|
||||
@@ -178,12 +322,173 @@ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
|
||||
wait_rcu_gp(rtp->call_func);
|
||||
}
|
||||
|
||||
// RCU callback function for rcu_barrier_tasks_generic().
|
||||
static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp)
|
||||
{
|
||||
struct rcu_tasks *rtp;
|
||||
struct rcu_tasks_percpu *rtpcp;
|
||||
|
||||
rtpcp = container_of(rhp, struct rcu_tasks_percpu, barrier_q_head);
|
||||
rtp = rtpcp->rtpp;
|
||||
if (atomic_dec_and_test(&rtp->barrier_q_count))
|
||||
complete(&rtp->barrier_q_completion);
|
||||
}
|
||||
|
||||
// Wait for all in-flight callbacks for the specified RCU Tasks flavor.
|
||||
// Operates in a manner similar to rcu_barrier().
|
||||
static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
struct rcu_tasks_percpu *rtpcp;
|
||||
unsigned long s = rcu_seq_snap(&rtp->barrier_q_seq);
|
||||
|
||||
mutex_lock(&rtp->barrier_q_mutex);
|
||||
if (rcu_seq_done(&rtp->barrier_q_seq, s)) {
|
||||
smp_mb();
|
||||
mutex_unlock(&rtp->barrier_q_mutex);
|
||||
return;
|
||||
}
|
||||
rcu_seq_start(&rtp->barrier_q_seq);
|
||||
init_completion(&rtp->barrier_q_completion);
|
||||
atomic_set(&rtp->barrier_q_count, 2);
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim))
|
||||
break;
|
||||
rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
|
||||
rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb;
|
||||
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
|
||||
if (rcu_segcblist_entrain(&rtpcp->cblist, &rtpcp->barrier_q_head))
|
||||
atomic_inc(&rtp->barrier_q_count);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
|
||||
}
|
||||
if (atomic_sub_and_test(2, &rtp->barrier_q_count))
|
||||
complete(&rtp->barrier_q_completion);
|
||||
wait_for_completion(&rtp->barrier_q_completion);
|
||||
rcu_seq_end(&rtp->barrier_q_seq);
|
||||
mutex_unlock(&rtp->barrier_q_mutex);
|
||||
}
|
||||
|
||||
// Advance callbacks and indicate whether either a grace period or
|
||||
// callback invocation is needed.
|
||||
static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
long n;
|
||||
long ncbs = 0;
|
||||
long ncbsnz = 0;
|
||||
int needgpcb = 0;
|
||||
|
||||
for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) {
|
||||
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
|
||||
|
||||
/* Advance and accelerate any new callbacks. */
|
||||
if (!rcu_segcblist_n_cbs(&rtpcp->cblist))
|
||||
continue;
|
||||
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
|
||||
// Should we shrink down to a single callback queue?
|
||||
n = rcu_segcblist_n_cbs(&rtpcp->cblist);
|
||||
if (n) {
|
||||
ncbs += n;
|
||||
if (cpu > 0)
|
||||
ncbsnz += n;
|
||||
}
|
||||
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
|
||||
(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
|
||||
if (rcu_segcblist_pend_cbs(&rtpcp->cblist))
|
||||
needgpcb |= 0x3;
|
||||
if (!rcu_segcblist_empty(&rtpcp->cblist))
|
||||
needgpcb |= 0x1;
|
||||
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
|
||||
}
|
||||
|
||||
// Shrink down to a single callback queue if appropriate.
|
||||
// This is done in two stages: (1) If there are no more than
|
||||
// rcu_task_collapse_lim callbacks on CPU 0 and none on any other
|
||||
// CPU, limit enqueueing to CPU 0. (2) After an RCU grace period,
|
||||
// if there has not been an increase in callbacks, limit dequeuing
|
||||
// to CPU 0. Note the matching RCU read-side critical section in
|
||||
// call_rcu_tasks_generic().
|
||||
if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rtp->percpu_enqueue_lim > 1) {
|
||||
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
|
||||
smp_store_release(&rtp->percpu_enqueue_lim, 1);
|
||||
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
|
||||
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
|
||||
}
|
||||
if (rcu_task_cb_adjust && !ncbsnz &&
|
||||
poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) {
|
||||
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
|
||||
if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) {
|
||||
WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
|
||||
pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
|
||||
}
|
||||
|
||||
return needgpcb;
|
||||
}
|
||||
|
||||
// Advance callbacks and invoke any that are ready.
|
||||
static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu *rtpcp)
|
||||
{
|
||||
int cpu;
|
||||
int cpunext;
|
||||
unsigned long flags;
|
||||
int len;
|
||||
struct rcu_head *rhp;
|
||||
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
|
||||
struct rcu_tasks_percpu *rtpcp_next;
|
||||
|
||||
cpu = rtpcp->cpu;
|
||||
cpunext = cpu * 2 + 1;
|
||||
if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
|
||||
rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
|
||||
queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
|
||||
cpunext++;
|
||||
if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
|
||||
rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
|
||||
queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
|
||||
}
|
||||
}
|
||||
|
||||
if (rcu_segcblist_empty(&rtpcp->cblist))
|
||||
return;
|
||||
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
|
||||
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
|
||||
rcu_segcblist_extract_done_cbs(&rtpcp->cblist, &rcl);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
|
||||
len = rcl.len;
|
||||
for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) {
|
||||
local_bh_disable();
|
||||
rhp->func(rhp);
|
||||
local_bh_enable();
|
||||
cond_resched();
|
||||
}
|
||||
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
|
||||
rcu_segcblist_add_len(&rtpcp->cblist, -len);
|
||||
(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
|
||||
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
|
||||
}
|
||||
|
||||
// Workqueue flood to advance callbacks and invoke any that are ready.
|
||||
static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp)
|
||||
{
|
||||
struct rcu_tasks *rtp;
|
||||
struct rcu_tasks_percpu *rtpcp = container_of(wp, struct rcu_tasks_percpu, rtp_work);
|
||||
|
||||
rtp = rtpcp->rtpp;
|
||||
rcu_tasks_invoke_cbs(rtp, rtpcp);
|
||||
}
|
||||
|
||||
/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
|
||||
static int __noreturn rcu_tasks_kthread(void *arg)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_head *list;
|
||||
struct rcu_head *next;
|
||||
int needgpcb;
|
||||
struct rcu_tasks *rtp = arg;
|
||||
|
||||
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
|
||||
@@ -199,42 +504,22 @@ static int __noreturn rcu_tasks_kthread(void *arg)
|
||||
for (;;) {
|
||||
set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
|
||||
|
||||
/* Pick up any new callbacks. */
|
||||
raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
|
||||
smp_mb__after_spinlock(); // Order updates vs. GP.
|
||||
list = rtp->cbs_head;
|
||||
rtp->cbs_head = NULL;
|
||||
rtp->cbs_tail = &rtp->cbs_head;
|
||||
raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
|
||||
|
||||
/* If there were none, wait a bit and start over. */
|
||||
if (!list) {
|
||||
wait_event_interruptible(rtp->cbs_wq,
|
||||
READ_ONCE(rtp->cbs_head));
|
||||
if (!rtp->cbs_head) {
|
||||
WARN_ON(signal_pending(current));
|
||||
set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS);
|
||||
schedule_timeout_idle(HZ/10);
|
||||
}
|
||||
continue;
|
||||
wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp)));
|
||||
|
||||
if (needgpcb & 0x2) {
|
||||
// Wait for one grace period.
|
||||
set_tasks_gp_state(rtp, RTGS_WAIT_GP);
|
||||
rtp->gp_start = jiffies;
|
||||
rcu_seq_start(&rtp->tasks_gp_seq);
|
||||
rtp->gp_func(rtp);
|
||||
rcu_seq_end(&rtp->tasks_gp_seq);
|
||||
}
|
||||
|
||||
// Wait for one grace period.
|
||||
set_tasks_gp_state(rtp, RTGS_WAIT_GP);
|
||||
rtp->gp_start = jiffies;
|
||||
rtp->gp_func(rtp);
|
||||
rtp->n_gps++;
|
||||
|
||||
/* Invoke the callbacks. */
|
||||
/* Invoke callbacks. */
|
||||
set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
|
||||
while (list) {
|
||||
next = list->next;
|
||||
local_bh_disable();
|
||||
list->func(list);
|
||||
local_bh_enable();
|
||||
list = next;
|
||||
cond_resched();
|
||||
}
|
||||
rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0));
|
||||
|
||||
/* Paranoid sleep to keep this from entering a tight loop */
|
||||
schedule_timeout_idle(rtp->gp_sleep);
|
||||
}
|
||||
@@ -279,14 +564,15 @@ static void __init rcu_tasks_bootup_oddness(void)
|
||||
/* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
|
||||
static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
|
||||
{
|
||||
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each...
|
||||
pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
|
||||
rtp->kname,
|
||||
tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
|
||||
jiffies - data_race(rtp->gp_jiffies),
|
||||
data_race(rtp->n_gps),
|
||||
data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
|
||||
data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
|
||||
".k"[!!data_race(rtp->kthread_ptr)],
|
||||
".C"[!!data_race(rtp->cbs_head)],
|
||||
".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))],
|
||||
s);
|
||||
}
|
||||
#endif // #ifndef CONFIG_TINY_RCU
|
||||
@@ -411,10 +697,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
|
||||
// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
|
||||
// read-side critical sections waited for by rcu_tasks_postscan().
|
||||
//
|
||||
// Pre-grace-period update-side code is ordered before the grace via the
|
||||
// ->cbs_lock and the smp_mb__after_spinlock(). Pre-grace-period read-side
|
||||
// code is ordered before the grace period via synchronize_rcu() call
|
||||
// in rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
|
||||
// Pre-grace-period update-side code is ordered before the grace
|
||||
// via the raw_spin_lock.*rcu_node(). Pre-grace-period read-side code
|
||||
// is ordered before the grace period via synchronize_rcu() call in
|
||||
// rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
|
||||
// disabling.
|
||||
|
||||
/* Pre-grace-period preparation. */
|
||||
@@ -586,13 +872,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
|
||||
*/
|
||||
void rcu_barrier_tasks(void)
|
||||
{
|
||||
/* There is only one callback queue, so this is easy. ;-) */
|
||||
synchronize_rcu_tasks();
|
||||
rcu_barrier_tasks_generic(&rcu_tasks);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
|
||||
|
||||
static int __init rcu_spawn_tasks_kthread(void)
|
||||
{
|
||||
cblist_init_generic(&rcu_tasks);
|
||||
rcu_tasks.gp_sleep = HZ / 10;
|
||||
rcu_tasks.init_fract = HZ / 10;
|
||||
rcu_tasks.pregp_func = rcu_tasks_pregp_step;
|
||||
@@ -724,13 +1010,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
|
||||
*/
|
||||
void rcu_barrier_tasks_rude(void)
|
||||
{
|
||||
/* There is only one callback queue, so this is easy. ;-) */
|
||||
synchronize_rcu_tasks_rude();
|
||||
rcu_barrier_tasks_generic(&rcu_tasks_rude);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
|
||||
|
||||
static int __init rcu_spawn_tasks_rude_kthread(void)
|
||||
{
|
||||
cblist_init_generic(&rcu_tasks_rude);
|
||||
rcu_tasks_rude.gp_sleep = HZ / 10;
|
||||
rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
|
||||
return 0;
|
||||
@@ -1073,25 +1359,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
|
||||
// Any tasks that exit after this point will set ->trc_reader_checked.
|
||||
}
|
||||
|
||||
/* Communicate task state back to the RCU tasks trace stall warning request. */
|
||||
struct trc_stall_chk_rdr {
|
||||
int nesting;
|
||||
int ipi_to_cpu;
|
||||
u8 needqs;
|
||||
};
|
||||
|
||||
static int trc_check_slow_task(struct task_struct *t, void *arg)
|
||||
{
|
||||
struct trc_stall_chk_rdr *trc_rdrp = arg;
|
||||
|
||||
if (task_curr(t))
|
||||
return false; // It is running, so decline to inspect it.
|
||||
trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
|
||||
trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
|
||||
trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Show the state of a task stalling the current RCU tasks trace GP. */
|
||||
static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
|
||||
{
|
||||
int cpu;
|
||||
struct trc_stall_chk_rdr trc_rdr;
|
||||
bool is_idle_tsk = is_idle_task(t);
|
||||
|
||||
if (*firstreport) {
|
||||
pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
|
||||
*firstreport = false;
|
||||
}
|
||||
// FIXME: This should attempt to use try_invoke_on_nonrunning_task().
|
||||
cpu = task_cpu(t);
|
||||
pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
|
||||
t->pid,
|
||||
".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0],
|
||||
".i"[is_idle_task(t)],
|
||||
".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
|
||||
READ_ONCE(t->trc_reader_nesting),
|
||||
" N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
|
||||
cpu);
|
||||
if (!task_call_func(t, trc_check_slow_task, &trc_rdr))
|
||||
pr_alert("P%d: %c\n",
|
||||
t->pid,
|
||||
".i"[is_idle_tsk]);
|
||||
else
|
||||
pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
|
||||
t->pid,
|
||||
".I"[trc_rdr.ipi_to_cpu >= 0],
|
||||
".i"[is_idle_tsk],
|
||||
".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
|
||||
trc_rdr.nesting,
|
||||
" N"[!!trc_rdr.needqs],
|
||||
cpu);
|
||||
sched_show_task(t);
|
||||
}
|
||||
|
||||
@@ -1121,7 +1432,8 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,
|
||||
trc_wait_for_one_reader(t, hop);
|
||||
|
||||
// If check succeeded, remove this task from the list.
|
||||
if (READ_ONCE(t->trc_reader_checked))
|
||||
if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 &&
|
||||
READ_ONCE(t->trc_reader_checked))
|
||||
trc_del_holdout(t);
|
||||
else if (needreport)
|
||||
show_stalled_task_trace(t, firstreport);
|
||||
@@ -1156,7 +1468,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
|
||||
// Yes, this assumes that CPUs process IPIs in order. If that ever
|
||||
// changes, there will need to be a recheck and/or timed wait.
|
||||
for_each_online_cpu(cpu)
|
||||
if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))
|
||||
if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))))
|
||||
smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1);
|
||||
|
||||
// Remove the safety count.
|
||||
@@ -1256,13 +1568,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);
|
||||
*/
|
||||
void rcu_barrier_tasks_trace(void)
|
||||
{
|
||||
/* There is only one callback queue, so this is easy. ;-) */
|
||||
synchronize_rcu_tasks_trace();
|
||||
rcu_barrier_tasks_generic(&rcu_tasks_trace);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
|
||||
|
||||
static int __init rcu_spawn_tasks_trace_kthread(void)
|
||||
{
|
||||
cblist_init_generic(&rcu_tasks_trace);
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
|
||||
rcu_tasks_trace.gp_sleep = HZ / 10;
|
||||
rcu_tasks_trace.init_fract = HZ / 10;
|
||||
|
||||
@@ -79,7 +79,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
|
||||
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
|
||||
.dynticks = ATOMIC_INIT(1),
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
.cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
|
||||
.cblist.flags = SEGCBLIST_RCU_CORE,
|
||||
#endif
|
||||
};
|
||||
static struct rcu_state rcu_state = {
|
||||
@@ -624,7 +624,6 @@ static noinstr void rcu_eqs_enter(bool user)
|
||||
instrumentation_begin();
|
||||
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
rcu_prepare_for_idle();
|
||||
rcu_preempt_deferred_qs(current);
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
@@ -768,9 +767,6 @@ noinstr void rcu_nmi_exit(void)
|
||||
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
|
||||
|
||||
if (!in_nmi())
|
||||
rcu_prepare_for_idle();
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
instrumentation_end();
|
||||
@@ -872,7 +868,6 @@ static void noinstr rcu_eqs_exit(bool user)
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
rcu_cleanup_after_idle();
|
||||
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
WRITE_ONCE(rdp->dynticks_nesting, 1);
|
||||
@@ -1014,12 +1009,6 @@ noinstr void rcu_nmi_enter(void)
|
||||
rcu_dynticks_eqs_exit();
|
||||
// ... but is watching here.
|
||||
|
||||
if (!in_nmi()) {
|
||||
instrumentation_begin();
|
||||
rcu_cleanup_after_idle();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
instrumentation_begin();
|
||||
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
|
||||
instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
@@ -1086,6 +1075,24 @@ void rcu_irq_enter_irqson(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future non-offloaded RCU-related work will need
|
||||
* to be done by the current CPU, even if none need be done immediately,
|
||||
* returning 1 if so. This function is part of the RCU implementation;
|
||||
* it is -not- an exported member of the RCU API. This is used by
|
||||
* the idle-entry code to figure out whether it is safe to disable the
|
||||
* scheduler-clock interrupt.
|
||||
*
|
||||
* Just check whether or not this CPU has non-offloaded RCU callbacks
|
||||
* queued.
|
||||
*/
|
||||
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
||||
{
|
||||
*nextevt = KTIME_MAX;
|
||||
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
|
||||
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
/*
|
||||
* If any sort of urgency was applied to the current CPU (for example,
|
||||
* the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
|
||||
@@ -1467,7 +1474,7 @@ static void rcu_gp_kthread_wake(void)
|
||||
{
|
||||
struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
|
||||
|
||||
if ((current == t && !in_irq() && !in_serving_softirq()) ||
|
||||
if ((current == t && !in_hardirq() && !in_serving_softirq()) ||
|
||||
!READ_ONCE(rcu_state.gp_flags) || !t)
|
||||
return;
|
||||
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
|
||||
@@ -1590,10 +1597,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
rcu_lockdep_assert_cblist_protected(rdp);
|
||||
if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
|
||||
!raw_spin_trylock_rcu_node(rnp))
|
||||
if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
|
||||
return;
|
||||
WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
|
||||
// The grace period cannot end while we hold the rcu_node lock.
|
||||
if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
|
||||
WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
|
||||
raw_spin_unlock_rcu_node(rnp);
|
||||
}
|
||||
|
||||
@@ -2277,7 +2285,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
|
||||
unsigned long flags;
|
||||
unsigned long mask;
|
||||
bool needwake = false;
|
||||
const bool offloaded = rcu_rdp_is_offloaded(rdp);
|
||||
bool needacc = false;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
WARN_ON_ONCE(rdp->cpu != smp_processor_id());
|
||||
@@ -2304,15 +2312,30 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
|
||||
/*
|
||||
* This GP can't end until cpu checks in, so all of our
|
||||
* callbacks can be processed during the next GP.
|
||||
*
|
||||
* NOCB kthreads have their own way to deal with that...
|
||||
*/
|
||||
if (!offloaded)
|
||||
if (!rcu_rdp_is_offloaded(rdp)) {
|
||||
needwake = rcu_accelerate_cbs(rnp, rdp);
|
||||
} else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
|
||||
/*
|
||||
* ...but NOCB kthreads may miss or delay callbacks acceleration
|
||||
* if in the middle of a (de-)offloading process.
|
||||
*/
|
||||
needacc = true;
|
||||
}
|
||||
|
||||
rcu_disable_urgency_upon_qs(rdp);
|
||||
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
|
||||
/* ^^^ Released rnp->lock */
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake();
|
||||
|
||||
if (needacc) {
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
rcu_accelerate_cbs_unlocked(rnp, rdp);
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2444,7 +2467,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
int div;
|
||||
bool __maybe_unused empty;
|
||||
unsigned long flags;
|
||||
const bool offloaded = rcu_rdp_is_offloaded(rdp);
|
||||
struct rcu_head *rhp;
|
||||
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
|
||||
long bl, count = 0;
|
||||
@@ -2462,18 +2484,17 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract the list of ready callbacks, disabling to prevent
|
||||
* Extract the list of ready callbacks, disabling IRQs to prevent
|
||||
* races with call_rcu() from interrupt handlers. Leave the
|
||||
* callback counts, as rcu_barrier() needs to be conservative.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
rcu_nocb_lock(rdp);
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
|
||||
pending = rcu_segcblist_n_cbs(&rdp->cblist);
|
||||
div = READ_ONCE(rcu_divisor);
|
||||
div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
|
||||
bl = max(rdp->blimit, pending >> div);
|
||||
if (unlikely(bl > 100)) {
|
||||
if (in_serving_softirq() && unlikely(bl > 100)) {
|
||||
long rrn = READ_ONCE(rcu_resched_ns);
|
||||
|
||||
rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
|
||||
@@ -2482,7 +2503,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
trace_rcu_batch_start(rcu_state.name,
|
||||
rcu_segcblist_n_cbs(&rdp->cblist), bl);
|
||||
rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
|
||||
if (offloaded)
|
||||
if (rcu_rdp_is_offloaded(rdp))
|
||||
rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
|
||||
|
||||
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
|
||||
@@ -2510,18 +2531,21 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
/*
|
||||
* Stop only if limit reached and CPU has something to do.
|
||||
*/
|
||||
if (count >= bl && !offloaded &&
|
||||
(need_resched() ||
|
||||
(!is_idle_task(current) && !rcu_is_callbacks_kthread())))
|
||||
break;
|
||||
if (unlikely(tlimit)) {
|
||||
/* only call local_clock() every 32 callbacks */
|
||||
if (likely((count & 31) || local_clock() < tlimit))
|
||||
continue;
|
||||
/* Exceeded the time limit, so leave. */
|
||||
break;
|
||||
}
|
||||
if (!in_serving_softirq()) {
|
||||
if (in_serving_softirq()) {
|
||||
if (count >= bl && (need_resched() || !is_idle_task(current)))
|
||||
break;
|
||||
/*
|
||||
* Make sure we don't spend too much time here and deprive other
|
||||
* softirq vectors of CPU cycles.
|
||||
*/
|
||||
if (unlikely(tlimit)) {
|
||||
/* only call local_clock() every 32 callbacks */
|
||||
if (likely((count & 31) || local_clock() < tlimit))
|
||||
continue;
|
||||
/* Exceeded the time limit, so leave. */
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
local_bh_enable();
|
||||
lockdep_assert_irqs_enabled();
|
||||
cond_resched_tasks_rcu_qs();
|
||||
@@ -2530,8 +2554,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
}
|
||||
}
|
||||
|
||||
local_irq_save(flags);
|
||||
rcu_nocb_lock(rdp);
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
rdp->n_cbs_invoked += count;
|
||||
trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
|
||||
is_idle_task(current), rcu_is_callbacks_kthread());
|
||||
@@ -2565,9 +2588,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
|
||||
/* Re-invoke RCU core processing if there are callbacks remaining. */
|
||||
if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))
|
||||
invoke_rcu_core();
|
||||
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
|
||||
}
|
||||
|
||||
@@ -2706,6 +2726,23 @@ static __latent_entropy void rcu_core(void)
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
/*
|
||||
* On RT rcu_core() can be preempted when IRQs aren't disabled.
|
||||
* Therefore this function can race with concurrent NOCB (de-)offloading
|
||||
* on this CPU and the below condition must be considered volatile.
|
||||
* However if we race with:
|
||||
*
|
||||
* _ Offloading: In the worst case we accelerate or process callbacks
|
||||
* concurrently with NOCB kthreads. We are guaranteed to
|
||||
* call rcu_nocb_lock() if that happens.
|
||||
*
|
||||
* _ Deoffloading: In the worst case we miss callbacks acceleration or
|
||||
* processing. This is fine because the early stage
|
||||
* of deoffloading invokes rcu_core() after setting
|
||||
* SEGCBLIST_RCU_CORE. So we guarantee that we'll process
|
||||
* what could have been dismissed without the need to wait
|
||||
* for the next rcu_pending() check in the next jiffy.
|
||||
*/
|
||||
const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
|
||||
|
||||
if (cpu_is_offline(smp_processor_id()))
|
||||
@@ -2714,7 +2751,7 @@ static __latent_entropy void rcu_core(void)
|
||||
WARN_ON_ONCE(!rdp->beenonline);
|
||||
|
||||
/* Report any deferred quiescent states if preemption enabled. */
|
||||
if (!(preempt_count() & PREEMPT_MASK)) {
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
|
||||
rcu_preempt_deferred_qs(current);
|
||||
} else if (rcu_preempt_need_deferred_qs(current)) {
|
||||
set_tsk_need_resched(current);
|
||||
@@ -2737,8 +2774,12 @@ static __latent_entropy void rcu_core(void)
|
||||
|
||||
/* If there are callbacks ready, invoke them. */
|
||||
if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
|
||||
likely(READ_ONCE(rcu_scheduler_fully_active)))
|
||||
likely(READ_ONCE(rcu_scheduler_fully_active))) {
|
||||
rcu_do_batch(rdp);
|
||||
/* Re-invoke RCU core processing if there are callbacks remaining. */
|
||||
if (rcu_segcblist_ready_cbs(&rdp->cblist))
|
||||
invoke_rcu_core();
|
||||
}
|
||||
|
||||
/* Do any needed deferred wakeups of rcuo kthreads. */
|
||||
do_nocb_deferred_wakeup(rdp);
|
||||
@@ -2982,7 +3023,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
kasan_record_aux_stack(head);
|
||||
kasan_record_aux_stack_noalloc(head);
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
/* Add the callback to our list. */
|
||||
@@ -3547,7 +3588,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
return;
|
||||
}
|
||||
|
||||
kasan_record_aux_stack(ptr);
|
||||
kasan_record_aux_stack_noalloc(ptr);
|
||||
success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head);
|
||||
if (!success) {
|
||||
run_page_cache_worker(krcp);
|
||||
|
||||
@@ -157,7 +157,6 @@ struct rcu_data {
|
||||
bool core_needs_qs; /* Core waits for quiescent state. */
|
||||
bool beenonline; /* CPU online at least once. */
|
||||
bool gpwrap; /* Possible ->gp_seq wrap. */
|
||||
bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
|
||||
bool cpu_started; /* RCU watching this onlining CPU. */
|
||||
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
|
||||
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
|
||||
@@ -189,11 +188,6 @@ struct rcu_data {
|
||||
bool rcu_urgent_qs; /* GP old need light quiescent state. */
|
||||
bool rcu_forced_tick; /* Forced tick to provide QS. */
|
||||
bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */
|
||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||
unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */
|
||||
unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */
|
||||
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
|
||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
|
||||
/* 4) rcu_barrier(), OOM callbacks, and expediting. */
|
||||
struct rcu_head barrier_head;
|
||||
@@ -227,8 +221,11 @@ struct rcu_data {
|
||||
struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */
|
||||
bool nocb_cb_sleep; /* Is the nocb CB thread asleep? */
|
||||
struct task_struct *nocb_cb_kthread;
|
||||
struct rcu_data *nocb_next_cb_rdp;
|
||||
/* Next rcu_data in wakeup chain. */
|
||||
struct list_head nocb_head_rdp; /*
|
||||
* Head of rcu_data list in wakeup chain,
|
||||
* if rdp_gp.
|
||||
*/
|
||||
struct list_head nocb_entry_rdp; /* rcu_data node in wakeup chain. */
|
||||
|
||||
/* The following fields are used by CB kthread, hence new cacheline. */
|
||||
struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp;
|
||||
@@ -419,8 +416,6 @@ static bool rcu_is_callbacks_kthread(void);
|
||||
static void rcu_cpu_kthread_setup(unsigned int cpu);
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
|
||||
static void __init rcu_spawn_boost_kthreads(void);
|
||||
static void rcu_cleanup_after_idle(void);
|
||||
static void rcu_prepare_for_idle(void);
|
||||
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
|
||||
static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t);
|
||||
@@ -447,12 +442,16 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
|
||||
static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp);
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
static void __init rcu_organize_nocb_kthreads(void);
|
||||
#define rcu_nocb_lock_irqsave(rdp, flags) \
|
||||
do { \
|
||||
if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) \
|
||||
local_irq_save(flags); \
|
||||
else \
|
||||
raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \
|
||||
|
||||
/*
|
||||
* Disable IRQs before checking offloaded state so that local
|
||||
* locking is safe against concurrent de-offloading.
|
||||
*/
|
||||
#define rcu_nocb_lock_irqsave(rdp, flags) \
|
||||
do { \
|
||||
local_irq_save(flags); \
|
||||
if (rcu_segcblist_is_offloaded(&(rdp)->cblist)) \
|
||||
raw_spin_lock(&(rdp)->nocb_lock); \
|
||||
} while (0)
|
||||
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
#define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)
|
||||
|
||||
@@ -255,7 +255,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
|
||||
*/
|
||||
static void rcu_report_exp_rdp(struct rcu_data *rdp)
|
||||
{
|
||||
WRITE_ONCE(rdp->exp_deferred_qs, false);
|
||||
WRITE_ONCE(rdp->cpu_no_qs.b.exp, false);
|
||||
rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
|
||||
}
|
||||
|
||||
@@ -387,6 +387,7 @@ retry_ipi:
|
||||
continue;
|
||||
}
|
||||
if (get_cpu() == cpu) {
|
||||
mask_ofl_test |= mask;
|
||||
put_cpu();
|
||||
continue;
|
||||
}
|
||||
@@ -506,7 +507,10 @@ static void synchronize_rcu_expedited_wait(void)
|
||||
if (rdp->rcu_forced_tick_exp)
|
||||
continue;
|
||||
rdp->rcu_forced_tick_exp = true;
|
||||
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
|
||||
preempt_disable();
|
||||
if (cpu_online(cpu))
|
||||
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
j = READ_ONCE(jiffies_till_first_fqs);
|
||||
@@ -655,7 +659,7 @@ static void rcu_exp_handler(void *unused)
|
||||
rcu_dynticks_curr_cpu_in_eqs()) {
|
||||
rcu_report_exp_rdp(rdp);
|
||||
} else {
|
||||
rdp->exp_deferred_qs = true;
|
||||
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
|
||||
set_tsk_need_resched(t);
|
||||
set_preempt_need_resched();
|
||||
}
|
||||
@@ -677,7 +681,7 @@ static void rcu_exp_handler(void *unused)
|
||||
if (depth > 0) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (rnp->expmask & rdp->grpmask) {
|
||||
rdp->exp_deferred_qs = true;
|
||||
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
|
||||
t->rcu_read_unlock_special.b.exp_hint = true;
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
@@ -759,7 +763,7 @@ static void sync_sched_exp_online_cleanup(int cpu)
|
||||
my_cpu = get_cpu();
|
||||
/* Quiescent state either not needed or already requested, leave. */
|
||||
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
|
||||
rdp->cpu_no_qs.b.exp) {
|
||||
READ_ONCE(rdp->cpu_no_qs.b.exp)) {
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -60,16 +60,22 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
|
||||
* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
|
||||
* If the list is invalid, a warning is emitted and all CPUs are offloaded.
|
||||
*/
|
||||
|
||||
static bool rcu_nocb_is_setup;
|
||||
|
||||
static int __init rcu_nocb_setup(char *str)
|
||||
{
|
||||
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
|
||||
if (cpulist_parse(str, rcu_nocb_mask)) {
|
||||
pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
|
||||
cpumask_setall(rcu_nocb_mask);
|
||||
if (*str == '=') {
|
||||
if (cpulist_parse(++str, rcu_nocb_mask)) {
|
||||
pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
|
||||
cpumask_setall(rcu_nocb_mask);
|
||||
}
|
||||
}
|
||||
rcu_nocb_is_setup = true;
|
||||
return 1;
|
||||
}
|
||||
__setup("rcu_nocbs=", rcu_nocb_setup);
|
||||
__setup("rcu_nocbs", rcu_nocb_setup);
|
||||
|
||||
static int __init parse_rcu_nocb_poll(char *arg)
|
||||
{
|
||||
@@ -625,7 +631,21 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
||||
* and the global grace-period kthread are awakened if needed.
|
||||
*/
|
||||
WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
|
||||
/*
|
||||
* An rcu_data structure is removed from the list after its
|
||||
* CPU is de-offloaded and added to the list before that CPU is
|
||||
* (re-)offloaded. If the following loop happens to be referencing
|
||||
* that rcu_data structure during the time that the corresponding
|
||||
* CPU is de-offloaded and then immediately re-offloaded, this
|
||||
* loop's rdp pointer will be carried to the end of the list by
|
||||
* the resulting pair of list operations. This can cause the loop
|
||||
* to skip over some of the rcu_data structures that were supposed
|
||||
* to have been scanned. Fortunately a new iteration through the
|
||||
* entire loop is forced after a given CPU's rcu_data structure
|
||||
* is added to the list, so the skipped-over rcu_data structures
|
||||
* won't be ignored for long.
|
||||
*/
|
||||
list_for_each_entry_rcu(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp, 1) {
|
||||
bool needwake_state = false;
|
||||
|
||||
if (!nocb_gp_enabled_cb(rdp))
|
||||
@@ -789,6 +809,18 @@ static void nocb_cb_wait(struct rcu_data *rdp)
|
||||
bool can_sleep = true;
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
do {
|
||||
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
|
||||
nocb_cb_wait_cond(rdp));
|
||||
|
||||
// VVV Ensure CB invocation follows _sleep test.
|
||||
if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
|
||||
WARN_ON(signal_pending(current));
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
|
||||
}
|
||||
} while (!nocb_cb_can_run(rdp));
|
||||
|
||||
|
||||
local_irq_save(flags);
|
||||
rcu_momentary_dyntick_idle();
|
||||
local_irq_restore(flags);
|
||||
@@ -841,17 +873,6 @@ static void nocb_cb_wait(struct rcu_data *rdp)
|
||||
|
||||
if (needwake_state)
|
||||
swake_up_one(&rdp->nocb_state_wq);
|
||||
|
||||
do {
|
||||
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
|
||||
nocb_cb_wait_cond(rdp));
|
||||
|
||||
// VVV Ensure CB invocation follows _sleep test.
|
||||
if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
|
||||
WARN_ON(signal_pending(current));
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
|
||||
}
|
||||
} while (!nocb_cb_can_run(rdp));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -990,22 +1011,33 @@ static long rcu_nocb_rdp_deoffload(void *arg)
|
||||
* will refuse to put anything into the bypass.
|
||||
*/
|
||||
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
|
||||
/*
|
||||
* Start with invoking rcu_core() early. This way if the current thread
|
||||
* happens to preempt an ongoing call to rcu_core() in the middle,
|
||||
* leaving some work dismissed because rcu_core() still thinks the rdp is
|
||||
* completely offloaded, we are guaranteed a nearby future instance of
|
||||
* rcu_core() to catch up.
|
||||
*/
|
||||
rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
|
||||
invoke_rcu_core();
|
||||
ret = rdp_offload_toggle(rdp, false, flags);
|
||||
swait_event_exclusive(rdp->nocb_state_wq,
|
||||
!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
|
||||
SEGCBLIST_KTHREAD_GP));
|
||||
/* Stop nocb_gp_wait() from iterating over this structure. */
|
||||
list_del_rcu(&rdp->nocb_entry_rdp);
|
||||
/*
|
||||
* Lock one last time to acquire latest callback updates from kthreads
|
||||
* so we can later handle callbacks locally without locking.
|
||||
*/
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
/*
|
||||
* Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb
|
||||
* Theoretically we could clear SEGCBLIST_LOCKING after the nocb
|
||||
* lock is released but how about being paranoid for once?
|
||||
*/
|
||||
rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
|
||||
rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING);
|
||||
/*
|
||||
* With SEGCBLIST_SOFTIRQ_ONLY, we can't use
|
||||
* Without SEGCBLIST_LOCKING, we can't use
|
||||
* rcu_nocb_unlock_irqrestore() anymore.
|
||||
*/
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
@@ -1057,15 +1089,26 @@ static long rcu_nocb_rdp_offload(void *arg)
|
||||
return -EINVAL;
|
||||
|
||||
pr_info("Offloading %d\n", rdp->cpu);
|
||||
|
||||
/*
|
||||
* Can't use rcu_nocb_lock_irqsave() while we are in
|
||||
* SEGCBLIST_SOFTIRQ_ONLY mode.
|
||||
* Cause future nocb_gp_wait() invocations to iterate over
|
||||
* structure, resetting ->nocb_gp_sleep and waking up the related
|
||||
* "rcuog". Since nocb_gp_wait() in turn locks ->nocb_gp_lock
|
||||
* before setting ->nocb_gp_sleep again, we are guaranteed to
|
||||
* iterate this newly added structure before "rcuog" goes to
|
||||
* sleep again.
|
||||
*/
|
||||
list_add_tail_rcu(&rdp->nocb_entry_rdp, &rdp->nocb_gp_rdp->nocb_head_rdp);
|
||||
|
||||
/*
|
||||
* Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
|
||||
* is set.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
|
||||
/*
|
||||
* We didn't take the nocb lock while working on the
|
||||
* rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
|
||||
* rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode).
|
||||
* Every modifications that have been done previously on
|
||||
* rdp->cblist must be visible remotely by the nocb kthreads
|
||||
* upon wake up after reading the cblist flags.
|
||||
@@ -1084,6 +1127,14 @@ static long rcu_nocb_rdp_offload(void *arg)
|
||||
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
|
||||
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
|
||||
|
||||
/*
|
||||
* All kthreads are ready to work, we can finally relieve rcu_core() and
|
||||
* enable nocb bypass.
|
||||
*/
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1122,13 +1173,17 @@ void __init rcu_init_nohz(void)
|
||||
need_rcu_nocb_mask = true;
|
||||
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
|
||||
|
||||
if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
|
||||
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
|
||||
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
|
||||
return;
|
||||
if (need_rcu_nocb_mask) {
|
||||
if (!cpumask_available(rcu_nocb_mask)) {
|
||||
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
|
||||
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
rcu_nocb_is_setup = true;
|
||||
}
|
||||
if (!cpumask_available(rcu_nocb_mask))
|
||||
|
||||
if (!rcu_nocb_is_setup)
|
||||
return;
|
||||
|
||||
#if defined(CONFIG_NO_HZ_FULL)
|
||||
@@ -1154,8 +1209,8 @@ void __init rcu_init_nohz(void)
|
||||
if (rcu_segcblist_empty(&rdp->cblist))
|
||||
rcu_segcblist_init(&rdp->cblist);
|
||||
rcu_segcblist_offload(&rdp->cblist, true);
|
||||
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
|
||||
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
|
||||
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
|
||||
rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE);
|
||||
}
|
||||
rcu_organize_nocb_kthreads();
|
||||
}
|
||||
@@ -1178,17 +1233,17 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
||||
* rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
|
||||
* for this CPU's group has not yet been created, spawn it as well.
|
||||
*/
|
||||
static void rcu_spawn_one_nocb_kthread(int cpu)
|
||||
static void rcu_spawn_cpu_nocb_kthread(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
struct rcu_data *rdp_gp;
|
||||
struct task_struct *t;
|
||||
|
||||
/*
|
||||
* If this isn't a no-CBs CPU or if it already has an rcuo kthread,
|
||||
* then nothing to do.
|
||||
*/
|
||||
if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
|
||||
if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
|
||||
return;
|
||||
|
||||
/* If there already is an rcuo kthread, then nothing to do. */
|
||||
if (rdp->nocb_cb_kthread)
|
||||
return;
|
||||
|
||||
/* If we didn't spawn the GP kthread first, reorganize! */
|
||||
@@ -1210,16 +1265,6 @@ static void rcu_spawn_one_nocb_kthread(int cpu)
|
||||
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the specified CPU is a no-CBs CPU that does not already have its
|
||||
* rcuo kthread, spawn it.
|
||||
*/
|
||||
static void rcu_spawn_cpu_nocb_kthread(int cpu)
|
||||
{
|
||||
if (rcu_scheduler_fully_active)
|
||||
rcu_spawn_one_nocb_kthread(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once the scheduler is running, spawn rcuo kthreads for all online
|
||||
* no-CBs CPUs. This assumes that the early_initcall()s happen before
|
||||
@@ -1230,8 +1275,10 @@ static void __init rcu_spawn_nocb_kthreads(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
rcu_spawn_cpu_nocb_kthread(cpu);
|
||||
if (rcu_nocb_is_setup) {
|
||||
for_each_online_cpu(cpu)
|
||||
rcu_spawn_cpu_nocb_kthread(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
|
||||
@@ -1251,7 +1298,6 @@ static void __init rcu_organize_nocb_kthreads(void)
|
||||
int nl = 0; /* Next GP kthread. */
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */
|
||||
struct rcu_data *rdp_prev = NULL;
|
||||
|
||||
if (!cpumask_available(rcu_nocb_mask))
|
||||
return;
|
||||
@@ -1265,14 +1311,14 @@ static void __init rcu_organize_nocb_kthreads(void)
|
||||
* Should the corresponding CPU come online in the future, then
|
||||
* we will spawn the needed set of rcu_nocb_kthread() kthreads.
|
||||
*/
|
||||
for_each_cpu(cpu, rcu_nocb_mask) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
if (rdp->cpu >= nl) {
|
||||
/* New GP kthread, set up for CBs & next GP. */
|
||||
gotnocbs = true;
|
||||
nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
|
||||
rdp->nocb_gp_rdp = rdp;
|
||||
rdp_gp = rdp;
|
||||
INIT_LIST_HEAD(&rdp->nocb_head_rdp);
|
||||
if (dump_tree) {
|
||||
if (!firsttime)
|
||||
pr_cont("%s\n", gotnocbscbs
|
||||
@@ -1285,12 +1331,12 @@ static void __init rcu_organize_nocb_kthreads(void)
|
||||
} else {
|
||||
/* Another CB kthread, link to previous GP kthread. */
|
||||
gotnocbscbs = true;
|
||||
rdp->nocb_gp_rdp = rdp_gp;
|
||||
rdp_prev->nocb_next_cb_rdp = rdp;
|
||||
if (dump_tree)
|
||||
pr_cont(" %d", cpu);
|
||||
}
|
||||
rdp_prev = rdp;
|
||||
rdp->nocb_gp_rdp = rdp_gp;
|
||||
if (cpumask_test_cpu(cpu, rcu_nocb_mask))
|
||||
list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp);
|
||||
}
|
||||
if (gotnocbs && dump_tree)
|
||||
pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
|
||||
@@ -1352,6 +1398,7 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
|
||||
{
|
||||
char bufw[20];
|
||||
char bufr[20];
|
||||
struct rcu_data *nocb_next_rdp;
|
||||
struct rcu_segcblist *rsclp = &rdp->cblist;
|
||||
bool waslocked;
|
||||
bool wassleep;
|
||||
@@ -1359,11 +1406,16 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
|
||||
if (rdp->nocb_gp_rdp == rdp)
|
||||
show_rcu_nocb_gp_state(rdp);
|
||||
|
||||
nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp,
|
||||
&rdp->nocb_entry_rdp,
|
||||
typeof(*rdp),
|
||||
nocb_entry_rdp);
|
||||
|
||||
sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
|
||||
sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
|
||||
pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
|
||||
rdp->cpu, rdp->nocb_gp_rdp->cpu,
|
||||
rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1,
|
||||
nocb_next_rdp ? nocb_next_rdp->cpu : -1,
|
||||
"kK"[!!rdp->nocb_cb_kthread],
|
||||
"bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
|
||||
"cC"[!!atomic_read(&rdp->nocb_lock_contended)],
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
|
||||
{
|
||||
/*
|
||||
* In order to read the offloaded state of an rdp is a safe
|
||||
* In order to read the offloaded state of an rdp in a safe
|
||||
* and stable way and prevent from its value to be changed
|
||||
* under us, we must either hold the barrier mutex, the cpu
|
||||
* hotplug lock (read or write) or the nocb lock. Local
|
||||
@@ -51,12 +51,10 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
RCU_FANOUT);
|
||||
if (rcu_fanout_exact)
|
||||
pr_info("\tHierarchical RCU autobalancing is disabled.\n");
|
||||
if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
|
||||
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
|
||||
if (IS_ENABLED(CONFIG_PROVE_RCU))
|
||||
pr_info("\tRCU lockdep checking is enabled.\n");
|
||||
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
|
||||
pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
|
||||
pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
|
||||
if (RCU_NUM_LVLS >= 4)
|
||||
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
|
||||
if (RCU_FANOUT_LEAF != 16)
|
||||
@@ -88,13 +86,13 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
if (rcu_kick_kthreads)
|
||||
pr_info("\tKick kthreads if too-long grace period.\n");
|
||||
if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
|
||||
pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
|
||||
pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
|
||||
if (gp_preinit_delay)
|
||||
pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
|
||||
if (gp_init_delay)
|
||||
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
|
||||
if (gp_cleanup_delay)
|
||||
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
|
||||
pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
|
||||
if (!use_softirq)
|
||||
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
|
||||
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
|
||||
@@ -260,10 +258,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
* no need to check for a subsequent expedited GP. (Though we are
|
||||
* still in a quiescent state in any case.)
|
||||
*/
|
||||
if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
|
||||
if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
else
|
||||
WARN_ON_ONCE(rdp->exp_deferred_qs);
|
||||
WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -277,12 +275,16 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
* current task, there might be any number of other tasks blocked while
|
||||
* in an RCU read-side critical section.
|
||||
*
|
||||
* Unlike non-preemptible-RCU, quiescent state reports for expedited
|
||||
* grace periods are handled separately via deferred quiescent states
|
||||
* and context switch events.
|
||||
*
|
||||
* Callers to this function must disable preemption.
|
||||
*/
|
||||
static void rcu_qs(void)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
|
||||
if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
|
||||
if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) {
|
||||
trace_rcu_grace_period(TPS("rcu_preempt"),
|
||||
__this_cpu_read(rcu_data.gp_seq),
|
||||
TPS("cpuqs"));
|
||||
@@ -350,7 +352,7 @@ void rcu_note_context_switch(bool preempt)
|
||||
* means that we continue to block the current grace period.
|
||||
*/
|
||||
rcu_qs();
|
||||
if (rdp->exp_deferred_qs)
|
||||
if (rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
rcu_tasks_qs(current, preempt);
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
@@ -477,7 +479,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
*/
|
||||
special = t->rcu_read_unlock_special;
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
if (!special.s && !rdp->exp_deferred_qs) {
|
||||
if (!special.s && !rdp->cpu_no_qs.b.exp) {
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
@@ -497,7 +499,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
* tasks are handled when removing the task from the
|
||||
* blocked-tasks list below.
|
||||
*/
|
||||
if (rdp->exp_deferred_qs)
|
||||
if (rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
|
||||
/* Clean up if blocked during RCU read-side critical section. */
|
||||
@@ -580,7 +582,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
*/
|
||||
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
|
||||
return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) ||
|
||||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
|
||||
rcu_preempt_depth() == 0;
|
||||
}
|
||||
@@ -642,7 +644,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
|
||||
(IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
|
||||
t->rcu_blocked_node);
|
||||
// Need to defer quiescent state until everything is enabled.
|
||||
if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
|
||||
if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) {
|
||||
// Using softirq, safe to awaken, and either the
|
||||
// wakeup is free or there is either an expedited
|
||||
// GP in flight or a potential need to deboost.
|
||||
@@ -845,10 +847,8 @@ static void rcu_qs(void)
|
||||
trace_rcu_grace_period(TPS("rcu_sched"),
|
||||
__this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
|
||||
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
|
||||
if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
|
||||
return;
|
||||
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
|
||||
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
|
||||
if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
|
||||
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -925,7 +925,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t) { }
|
||||
|
||||
// Except that we do need to respond to a request by an expedited grace
|
||||
// period for a quiescent state from this CPU. Note that requests from
|
||||
// tasks are handled when removing the task from the blocked-tasks list
|
||||
// below.
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
if (rdp->cpu_no_qs.b.exp)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Because there is no preemptible RCU, there can be no readers blocked,
|
||||
@@ -1153,7 +1164,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
|
||||
/*
|
||||
* Create an RCU-boost kthread for the specified node if one does not
|
||||
* already exist. We only create this kthread for preemptible RCU.
|
||||
* Returns zero if all is well, a negated errno otherwise.
|
||||
*/
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
|
||||
{
|
||||
@@ -1204,8 +1214,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
|
||||
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
|
||||
cpu != outgoingcpu)
|
||||
cpumask_set_cpu(cpu, cm);
|
||||
cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
|
||||
if (cpumask_weight(cm) == 0)
|
||||
cpumask_setall(cm);
|
||||
cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
|
||||
set_cpus_allowed_ptr(t, cm);
|
||||
free_cpumask_var(cm);
|
||||
}
|
||||
@@ -1253,201 +1264,6 @@ static void __init rcu_spawn_boost_kthreads(void)
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
#if !defined(CONFIG_RCU_FAST_NO_HZ)
|
||||
|
||||
/*
|
||||
* Check to see if any future non-offloaded RCU-related work will need
|
||||
* to be done by the current CPU, even if none need be done immediately,
|
||||
* returning 1 if so. This function is part of the RCU implementation;
|
||||
* it is -not- an exported member of the RCU API.
|
||||
*
|
||||
* Because we not have RCU_FAST_NO_HZ, just check whether or not this
|
||||
* CPU has RCU callbacks queued.
|
||||
*/
|
||||
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
||||
{
|
||||
*nextevt = KTIME_MAX;
|
||||
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
|
||||
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
/*
|
||||
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
|
||||
* after it.
|
||||
*/
|
||||
static void rcu_cleanup_after_idle(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
|
||||
* is nothing.
|
||||
*/
|
||||
static void rcu_prepare_for_idle(void)
|
||||
{
|
||||
}
|
||||
|
||||
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
|
||||
|
||||
/*
|
||||
* This code is invoked when a CPU goes idle, at which point we want
|
||||
* to have the CPU do everything required for RCU so that it can enter
|
||||
* the energy-efficient dyntick-idle mode.
|
||||
*
|
||||
* The following preprocessor symbol controls this:
|
||||
*
|
||||
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
|
||||
* to sleep in dyntick-idle mode with RCU callbacks pending. This
|
||||
* is sized to be roughly one RCU grace period. Those energy-efficiency
|
||||
* benchmarkers who might otherwise be tempted to set this to a large
|
||||
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
|
||||
* system. And if you are -that- concerned about energy efficiency,
|
||||
* just power the system down and be done with it!
|
||||
*
|
||||
* The value below works well in practice. If future workloads require
|
||||
* adjustment, they can be converted into kernel config parameters, though
|
||||
* making the state machine smarter might be a better option.
|
||||
*/
|
||||
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
|
||||
|
||||
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
|
||||
module_param(rcu_idle_gp_delay, int, 0644);
|
||||
|
||||
/*
|
||||
* Try to advance callbacks on the current CPU, but only if it has been
|
||||
* awhile since the last time we did so. Afterwards, if there are any
|
||||
* callbacks ready for immediate invocation, return true.
|
||||
*/
|
||||
static bool __maybe_unused rcu_try_advance_all_cbs(void)
|
||||
{
|
||||
bool cbs_ready = false;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp;
|
||||
|
||||
/* Exit early if we advanced recently. */
|
||||
if (jiffies == rdp->last_advance_all)
|
||||
return false;
|
||||
rdp->last_advance_all = jiffies;
|
||||
|
||||
rnp = rdp->mynode;
|
||||
|
||||
/*
|
||||
* Don't bother checking unless a grace period has
|
||||
* completed since we last checked and there are
|
||||
* callbacks not yet ready to invoke.
|
||||
*/
|
||||
if ((rcu_seq_completed_gp(rdp->gp_seq,
|
||||
rcu_seq_current(&rnp->gp_seq)) ||
|
||||
unlikely(READ_ONCE(rdp->gpwrap))) &&
|
||||
rcu_segcblist_pend_cbs(&rdp->cblist))
|
||||
note_gp_changes(rdp);
|
||||
|
||||
if (rcu_segcblist_ready_cbs(&rdp->cblist))
|
||||
cbs_ready = true;
|
||||
return cbs_ready;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
|
||||
* to invoke. If the CPU has callbacks, try to advance them. Tell the
|
||||
* caller about what to set the timeout.
|
||||
*
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
unsigned long dj;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* If no non-offloaded callbacks, RCU doesn't need the CPU. */
|
||||
if (rcu_segcblist_empty(&rdp->cblist) ||
|
||||
rcu_rdp_is_offloaded(rdp)) {
|
||||
*nextevt = KTIME_MAX;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Attempt to advance callbacks. */
|
||||
if (rcu_try_advance_all_cbs()) {
|
||||
/* Some ready to invoke, so initiate later invocation. */
|
||||
invoke_rcu_core();
|
||||
return 1;
|
||||
}
|
||||
rdp->last_accelerate = jiffies;
|
||||
|
||||
/* Request timer and round. */
|
||||
dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
|
||||
|
||||
*nextevt = basemono + dj * TICK_NSEC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a CPU for idle from an RCU perspective. The first major task is to
|
||||
* sense whether nohz mode has been enabled or disabled via sysfs. The second
|
||||
* major task is to accelerate (that is, assign grace-period numbers to) any
|
||||
* recently arrived callbacks.
|
||||
*
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
static void rcu_prepare_for_idle(void)
|
||||
{
|
||||
bool needwake;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp;
|
||||
int tne;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
if (rcu_rdp_is_offloaded(rdp))
|
||||
return;
|
||||
|
||||
/* Handle nohz enablement switches conservatively. */
|
||||
tne = READ_ONCE(tick_nohz_active);
|
||||
if (tne != rdp->tick_nohz_enabled_snap) {
|
||||
if (!rcu_segcblist_empty(&rdp->cblist))
|
||||
invoke_rcu_core(); /* force nohz to see update. */
|
||||
rdp->tick_nohz_enabled_snap = tne;
|
||||
return;
|
||||
}
|
||||
if (!tne)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we have not yet accelerated this jiffy, accelerate all
|
||||
* callbacks on this CPU.
|
||||
*/
|
||||
if (rdp->last_accelerate == jiffies)
|
||||
return;
|
||||
rdp->last_accelerate = jiffies;
|
||||
if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
needwake = rcu_accelerate_cbs(rnp, rdp);
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up for exit from idle. Attempt to advance callbacks based on
|
||||
* any grace periods that elapsed while the CPU was idle, and if any
|
||||
* callbacks are now ready to invoke, initiate invocation.
|
||||
*/
|
||||
static void rcu_cleanup_after_idle(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
if (rcu_rdp_is_offloaded(rdp))
|
||||
return;
|
||||
if (rcu_try_advance_all_cbs())
|
||||
invoke_rcu_core();
|
||||
}
|
||||
|
||||
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
|
||||
|
||||
/*
|
||||
* Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
|
||||
* grace-period kthread will do force_quiescent_state() processing?
|
||||
@@ -1455,7 +1271,7 @@ static void rcu_cleanup_after_idle(void)
|
||||
* CPU unless the grace period has extended for too long.
|
||||
*
|
||||
* This code relies on the fact that all NO_HZ_FULL CPUs are also
|
||||
* CONFIG_RCU_NOCB_CPU CPUs.
|
||||
* RCU_NOCB_CPU CPUs.
|
||||
*/
|
||||
static bool rcu_nohz_full_cpu(void)
|
||||
{
|
||||
|
||||
@@ -347,26 +347,6 @@ static void rcu_dump_cpu_stacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||
|
||||
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
|
||||
sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
|
||||
rdp->last_accelerate & 0xffff, jiffies & 0xffff,
|
||||
!!rdp->tick_nohz_enabled_snap);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
|
||||
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
|
||||
{
|
||||
*cp = '\0';
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
|
||||
static const char * const gp_state_names[] = {
|
||||
[RCU_GP_IDLE] = "RCU_GP_IDLE",
|
||||
[RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS",
|
||||
@@ -408,13 +388,12 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
|
||||
* of RCU grace periods that this CPU is ignorant of, for example, "1"
|
||||
* if the CPU was aware of the previous grace period.
|
||||
*
|
||||
* Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
|
||||
* Also print out idle info.
|
||||
*/
|
||||
static void print_cpu_stall_info(int cpu)
|
||||
{
|
||||
unsigned long delta;
|
||||
bool falsepositive;
|
||||
char fast_no_hz[72];
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
char *ticks_title;
|
||||
unsigned long ticks_value;
|
||||
@@ -432,11 +411,10 @@ static void print_cpu_stall_info(int cpu)
|
||||
ticks_title = "ticks this GP";
|
||||
ticks_value = rdp->ticks_this_gp;
|
||||
}
|
||||
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
|
||||
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
|
||||
falsepositive = rcu_is_gp_kthread_starving(NULL) &&
|
||||
rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
|
||||
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
|
||||
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
|
||||
cpu,
|
||||
"O."[!!cpu_online(cpu)],
|
||||
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
|
||||
@@ -449,7 +427,6 @@ static void print_cpu_stall_info(int cpu)
|
||||
rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
|
||||
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
|
||||
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
|
||||
fast_no_hz,
|
||||
falsepositive ? " (false positive?)" : "");
|
||||
}
|
||||
|
||||
|
||||
@@ -38,14 +38,10 @@
|
||||
#define SCFTORT_STRING "scftorture"
|
||||
#define SCFTORT_FLAG SCFTORT_STRING ": "
|
||||
|
||||
#define SCFTORTOUT(s, x...) \
|
||||
pr_alert(SCFTORT_FLAG s, ## x)
|
||||
|
||||
#define VERBOSE_SCFTORTOUT(s, x...) \
|
||||
do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0)
|
||||
do { if (verbose) pr_alert(SCFTORT_FLAG s "\n", ## x); } while (0)
|
||||
|
||||
#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \
|
||||
do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0)
|
||||
#define SCFTORTOUT_ERRSTRING(s, x...) pr_alert(SCFTORT_FLAG "!!! " s "\n", ## x)
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
|
||||
@@ -587,14 +583,14 @@ static int __init scf_torture_init(void)
|
||||
if (weight_resched1 == 0 && weight_single1 == 0 && weight_single_rpc1 == 0 &&
|
||||
weight_single_wait1 == 0 && weight_many1 == 0 && weight_many_wait1 == 0 &&
|
||||
weight_all1 == 0 && weight_all_wait1 == 0) {
|
||||
VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
|
||||
SCFTORTOUT_ERRSTRING("all zero weights makes no sense");
|
||||
firsterr = -EINVAL;
|
||||
goto unwind;
|
||||
}
|
||||
if (IS_BUILTIN(CONFIG_SCF_TORTURE_TEST))
|
||||
scf_sel_add(weight_resched1, SCF_PRIM_RESCHED, false);
|
||||
else if (weight_resched1)
|
||||
VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
|
||||
SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored");
|
||||
scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false);
|
||||
scf_sel_add(weight_single_rpc1, SCF_PRIM_SINGLE_RPC, true);
|
||||
scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true);
|
||||
@@ -625,12 +621,12 @@ static int __init scf_torture_init(void)
|
||||
nthreads = num_online_cpus();
|
||||
scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL);
|
||||
if (!scf_stats_p) {
|
||||
VERBOSE_SCFTORTOUT_ERRSTRING("out of memory");
|
||||
SCFTORTOUT_ERRSTRING("out of memory");
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads);
|
||||
VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads", nthreads);
|
||||
|
||||
atomic_set(&n_started, nthreads);
|
||||
for (i = 0; i < nthreads; i++) {
|
||||
|
||||
@@ -570,7 +570,7 @@ int torture_shuffle_init(long shuffint)
|
||||
shuffle_idle_cpu = -1;
|
||||
|
||||
if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) {
|
||||
VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask");
|
||||
TOROUT_ERRSTRING("Failed to alloc mask");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -934,7 +934,7 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
|
||||
*tp = kthread_run(fn, arg, "%s", s);
|
||||
if (IS_ERR(*tp)) {
|
||||
ret = PTR_ERR(*tp);
|
||||
VERBOSE_TOROUT_ERRSTRING(f);
|
||||
TOROUT_ERRSTRING(f);
|
||||
*tp = NULL;
|
||||
}
|
||||
torture_shuffle_task_register(*tp);
|
||||
|
||||
Reference in New Issue
Block a user