Merge branch 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue fixes from Tejun Heo: "Workqueue fixes for v4.5-rc3. - Remove a spurious triggering of flush dependency warning. - Officially break local execution guarantee of unbound work items and add a debug feature to flush out usages which depend on it. - Work around CPU -> NODE mapping becoming invalid on CPU offline. The branch is young but pushing out early as stable kernels are being affected" * 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: handle NUMA_NO_NODE for unbound pool_workqueue lookup workqueue: implement "workqueue.debug_force_rr_cpu" debug feature workqueue: schedule WORK_CPU_UNBOUND work on wq_unbound_cpumask CPUs Revert "workqueue: make sure delayed work run in local cpu" workqueue: skip flush dependency checks for legacy workqueues
2016-02-10 11:04:05 -08:00 · 2016-02-10 11:04:05 -08:00 · 9aece75c13
commit 9aece75c13
parent 2178cbc68f d6e022f1d2
4 changed files with 98 additions and 11 deletions
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@ -4235,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			The default value of this parameter is determined by
 			the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
 	workqueue.debug_force_rr_cpu
 			Workqueue used to implicitly guarantee that work
 			items queued without explicit CPU specified are put
 			on the local CPU.  This guarantee is no longer true
 			and while local CPU is still preferred work items
 			may be put on foreign CPUs.  This debug option
 			forces round-robin CPU selection to flush out
 			usages which depend on the now broken guarantee.
 			When enabled, memory and cache locality will be
 			impacted.
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@ -311,6 +311,7 @@ enum {
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
 	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 #define create_workqueue(name)						\
-	alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)				\
-	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND |	\
-			1, (name))
+			WQ_MEM_RECLAIM, 1, (name))
 #define create_singlethread_workqueue(name)				\
-	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+	alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
 extern void destroy_workqueue(struct workqueue_struct *wq);
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
 static cpumask_var_t wq_unbound_cpumask;
 /* CPU where unbound work was last round robin scheduled from this CPU */
 static DEFINE_PER_CPU(int, wq_rr_cpu_last);
 /*
 * Local execution of unbound work items is no longer guaranteed.  The
 * following always forces round-robin CPU selection on unbound work items
 * to uncover usages which depend on it.
 */
 #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
 static bool wq_debug_force_rr_cpu = true;
 #else
 static bool wq_debug_force_rr_cpu = false;
 #endif
 module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
 						  int node)
 {
 	assert_rcu_or_wq_mutex_or_pool_mutex(wq);
 	/*
 	 * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
 	 * delayed item is pending.  The plan is to keep CPU -> NODE
 	 * mapping valid and stable across CPU on/offlines.  Once that
 	 * happens, this workaround can be removed.
 	 */
 	if (unlikely(node == NUMA_NO_NODE))
 		return wq->dfl_pwq;
 	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 }
@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	return worker && worker->current_pwq->wq == wq;
 }
 /*
 * When queueing an unbound work item to a wq, prefer local CPU if allowed
 * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
 * avoid perturbing sensitive tasks.
 */
 static int wq_select_unbound_cpu(int cpu)
 {
 	static bool printed_dbg_warning;
 	int new_cpu;
 	if (likely(!wq_debug_force_rr_cpu)) {
 		if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
 			return cpu;
 	} else if (!printed_dbg_warning) {
 		pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
 		printed_dbg_warning = true;
 	}
 	if (cpumask_empty(wq_unbound_cpumask))
 		return cpu;
 	new_cpu = __this_cpu_read(wq_rr_cpu_last);
 	new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
 	if (unlikely(new_cpu >= nr_cpu_ids)) {
 		new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
 		if (unlikely(new_cpu >= nr_cpu_ids))
 			return cpu;
 	}
 	__this_cpu_write(wq_rr_cpu_last, new_cpu);
 	return new_cpu;
 }
 static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 		return;
 retry:
 	if (req_cpu == WORK_CPU_UNBOUND)
-		cpu = raw_smp_processor_id();
+		cpu = wq_select_unbound_cpu(raw_smp_processor_id());
 	/* pwq which will be used unless @work is executing elsewhere */
 	if (!(wq->flags & WQ_UNBOUND))
@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 	timer_stats_timer_set_start_info(&dwork->timer);
 	dwork->wq = wq;
 	/* timer isn't guaranteed to run in this cpu, record earlier */
 	if (cpu == WORK_CPU_UNBOUND)
 		cpu = raw_smp_processor_id();
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 	if (unlikely(cpu != WORK_CPU_UNBOUND))
 		add_timer_on(timer, cpu);
 	else
 		add_timer(timer);
 }
 /**
@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
 	WARN_ONCE(current->flags & PF_MEMALLOC,
 		  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
 		  current->pid, current->comm, target_wq->name, target_func);
-	WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+	WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
 			      (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
 		  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
 		  worker->current_pwq->wq->name, worker->current_func,
 		  target_wq->name, target_func);
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
 endmenu # "RCU Debugging"
 config DEBUG_WQ_FORCE_RR_CPU
 	bool "Force round-robin CPU selection for unbound work items"
 	depends on DEBUG_KERNEL
 	default n
 	help
 	  Workqueue used to implicitly guarantee that work items queued
 	  without explicit CPU specified are put on the local CPU.  This
 	  guarantee is no longer true and while local CPU is still
 	  preferred work items may be put on foreign CPUs.  Kernel
 	  parameter "workqueue.debug_force_rr_cpu" is added to force
 	  round-robin CPU selection to flush out usages which depend on the
 	  now broken guarantee.  This config option enables the debug
 	  feature by default.  When enabled, memory and cache locality will
 	  be impacted.
 config DEBUG_BLOCK_EXT_DEVT
        bool "Force extended block device numbers and spread them"
 	depends on DEBUG_KERNEL