Merge branch 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fixes from Tejun Heo: "Workqueue fixes for v4.5-rc3. - Remove a spurious triggering of flush dependency warning. - Officially break local execution guarantee of unbound work items and add a debug feature to flush out usages which depend on it. - Work around CPU -> NODE mapping becoming invalid on CPU offline. The branch is young but pushing out early as stable kernels are being affected" * 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: handle NUMA_NO_NODE for unbound pool_workqueue lookup workqueue: implement "workqueue.debug_force_rr_cpu" debug feature workqueue: schedule WORK_CPU_UNBOUND work on wq_unbound_cpumask CPUs Revert "workqueue: make sure delayed work run in local cpu" workqueue: skip flush dependency checks for legacy workqueues
This commit is contained in:
commit
9aece75c13
@ -4235,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||||||
The default value of this parameter is determined by
|
The default value of this parameter is determined by
|
||||||
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
|
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
|
||||||
|
|
||||||
|
workqueue.debug_force_rr_cpu
|
||||||
|
Workqueue used to implicitly guarantee that work
|
||||||
|
items queued without explicit CPU specified are put
|
||||||
|
on the local CPU. This guarantee is no longer true
|
||||||
|
and while local CPU is still preferred work items
|
||||||
|
may be put on foreign CPUs. This debug option
|
||||||
|
forces round-robin CPU selection to flush out
|
||||||
|
usages which depend on the now broken guarantee.
|
||||||
|
When enabled, memory and cache locality will be
|
||||||
|
impacted.
|
||||||
|
|
||||||
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
|
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
|
||||||
default x2apic cluster mode on platforms
|
default x2apic cluster mode on platforms
|
||||||
supporting x2apic.
|
supporting x2apic.
|
||||||
|
@ -311,6 +311,7 @@ enum {
|
|||||||
|
|
||||||
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
|
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
|
||||||
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
|
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
|
||||||
|
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
|
||||||
|
|
||||||
WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
|
WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
|
||||||
WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
|
WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
|
||||||
@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
|
|||||||
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
|
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
|
||||||
|
|
||||||
#define create_workqueue(name) \
|
#define create_workqueue(name) \
|
||||||
alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
|
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
|
||||||
#define create_freezable_workqueue(name) \
|
#define create_freezable_workqueue(name) \
|
||||||
alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
|
alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
|
||||||
1, (name))
|
WQ_MEM_RECLAIM, 1, (name))
|
||||||
#define create_singlethread_workqueue(name) \
|
#define create_singlethread_workqueue(name) \
|
||||||
alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
|
alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
|
||||||
|
|
||||||
extern void destroy_workqueue(struct workqueue_struct *wq);
|
extern void destroy_workqueue(struct workqueue_struct *wq);
|
||||||
|
|
||||||
|
@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
|
|||||||
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
|
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
|
||||||
static bool workqueue_freezing; /* PL: have wqs started freezing? */
|
static bool workqueue_freezing; /* PL: have wqs started freezing? */
|
||||||
|
|
||||||
static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
|
/* PL: allowable cpus for unbound wqs and work items */
|
||||||
|
static cpumask_var_t wq_unbound_cpumask;
|
||||||
|
|
||||||
|
/* CPU where unbound work was last round robin scheduled from this CPU */
|
||||||
|
static DEFINE_PER_CPU(int, wq_rr_cpu_last);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local execution of unbound work items is no longer guaranteed. The
|
||||||
|
* following always forces round-robin CPU selection on unbound work items
|
||||||
|
* to uncover usages which depend on it.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
|
||||||
|
static bool wq_debug_force_rr_cpu = true;
|
||||||
|
#else
|
||||||
|
static bool wq_debug_force_rr_cpu = false;
|
||||||
|
#endif
|
||||||
|
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
|
||||||
|
|
||||||
/* the per-cpu worker pools */
|
/* the per-cpu worker pools */
|
||||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
|
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
|
||||||
@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
|
|||||||
int node)
|
int node)
|
||||||
{
|
{
|
||||||
assert_rcu_or_wq_mutex_or_pool_mutex(wq);
|
assert_rcu_or_wq_mutex_or_pool_mutex(wq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
|
||||||
|
* delayed item is pending. The plan is to keep CPU -> NODE
|
||||||
|
* mapping valid and stable across CPU on/offlines. Once that
|
||||||
|
* happens, this workaround can be removed.
|
||||||
|
*/
|
||||||
|
if (unlikely(node == NUMA_NO_NODE))
|
||||||
|
return wq->dfl_pwq;
|
||||||
|
|
||||||
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
|
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
|
|||||||
return worker && worker->current_pwq->wq == wq;
|
return worker && worker->current_pwq->wq == wq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When queueing an unbound work item to a wq, prefer local CPU if allowed
|
||||||
|
* by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
|
||||||
|
* avoid perturbing sensitive tasks.
|
||||||
|
*/
|
||||||
|
static int wq_select_unbound_cpu(int cpu)
|
||||||
|
{
|
||||||
|
static bool printed_dbg_warning;
|
||||||
|
int new_cpu;
|
||||||
|
|
||||||
|
if (likely(!wq_debug_force_rr_cpu)) {
|
||||||
|
if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
|
||||||
|
return cpu;
|
||||||
|
} else if (!printed_dbg_warning) {
|
||||||
|
pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
|
||||||
|
printed_dbg_warning = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpumask_empty(wq_unbound_cpumask))
|
||||||
|
return cpu;
|
||||||
|
|
||||||
|
new_cpu = __this_cpu_read(wq_rr_cpu_last);
|
||||||
|
new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
|
||||||
|
if (unlikely(new_cpu >= nr_cpu_ids)) {
|
||||||
|
new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
|
||||||
|
if (unlikely(new_cpu >= nr_cpu_ids))
|
||||||
|
return cpu;
|
||||||
|
}
|
||||||
|
__this_cpu_write(wq_rr_cpu_last, new_cpu);
|
||||||
|
|
||||||
|
return new_cpu;
|
||||||
|
}
|
||||||
|
|
||||||
static void __queue_work(int cpu, struct workqueue_struct *wq,
|
static void __queue_work(int cpu, struct workqueue_struct *wq,
|
||||||
struct work_struct *work)
|
struct work_struct *work)
|
||||||
{
|
{
|
||||||
@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
|
|||||||
return;
|
return;
|
||||||
retry:
|
retry:
|
||||||
if (req_cpu == WORK_CPU_UNBOUND)
|
if (req_cpu == WORK_CPU_UNBOUND)
|
||||||
cpu = raw_smp_processor_id();
|
cpu = wq_select_unbound_cpu(raw_smp_processor_id());
|
||||||
|
|
||||||
/* pwq which will be used unless @work is executing elsewhere */
|
/* pwq which will be used unless @work is executing elsewhere */
|
||||||
if (!(wq->flags & WQ_UNBOUND))
|
if (!(wq->flags & WQ_UNBOUND))
|
||||||
@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
|
|||||||
timer_stats_timer_set_start_info(&dwork->timer);
|
timer_stats_timer_set_start_info(&dwork->timer);
|
||||||
|
|
||||||
dwork->wq = wq;
|
dwork->wq = wq;
|
||||||
/* timer isn't guaranteed to run in this cpu, record earlier */
|
|
||||||
if (cpu == WORK_CPU_UNBOUND)
|
|
||||||
cpu = raw_smp_processor_id();
|
|
||||||
dwork->cpu = cpu;
|
dwork->cpu = cpu;
|
||||||
timer->expires = jiffies + delay;
|
timer->expires = jiffies + delay;
|
||||||
|
|
||||||
|
if (unlikely(cpu != WORK_CPU_UNBOUND))
|
||||||
add_timer_on(timer, cpu);
|
add_timer_on(timer, cpu);
|
||||||
|
else
|
||||||
|
add_timer(timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
|
|||||||
WARN_ONCE(current->flags & PF_MEMALLOC,
|
WARN_ONCE(current->flags & PF_MEMALLOC,
|
||||||
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
|
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
|
||||||
current->pid, current->comm, target_wq->name, target_func);
|
current->pid, current->comm, target_wq->name, target_func);
|
||||||
WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
|
WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
|
||||||
|
(WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
|
||||||
"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
|
"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
|
||||||
worker->current_pwq->wq->name, worker->current_func,
|
worker->current_pwq->wq->name, worker->current_func,
|
||||||
target_wq->name, target_func);
|
target_wq->name, target_func);
|
||||||
|
@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
|
|||||||
|
|
||||||
endmenu # "RCU Debugging"
|
endmenu # "RCU Debugging"
|
||||||
|
|
||||||
|
config DEBUG_WQ_FORCE_RR_CPU
|
||||||
|
bool "Force round-robin CPU selection for unbound work items"
|
||||||
|
depends on DEBUG_KERNEL
|
||||||
|
default n
|
||||||
|
help
|
||||||
|
Workqueue used to implicitly guarantee that work items queued
|
||||||
|
without explicit CPU specified are put on the local CPU. This
|
||||||
|
guarantee is no longer true and while local CPU is still
|
||||||
|
preferred work items may be put on foreign CPUs. Kernel
|
||||||
|
parameter "workqueue.debug_force_rr_cpu" is added to force
|
||||||
|
round-robin CPU selection to flush out usages which depend on the
|
||||||
|
now broken guarantee. This config option enables the debug
|
||||||
|
feature by default. When enabled, memory and cache locality will
|
||||||
|
be impacted.
|
||||||
|
|
||||||
config DEBUG_BLOCK_EXT_DEVT
|
config DEBUG_BLOCK_EXT_DEVT
|
||||||
bool "Force extended block device numbers and spread them"
|
bool "Force extended block device numbers and spread them"
|
||||||
depends on DEBUG_KERNEL
|
depends on DEBUG_KERNEL
|
||||||
|
Loading…
Reference in New Issue
Block a user