forked from Minki/linux
workqueue: map an unbound workqueues to multiple per-node pool_workqueues
Currently, an unbound workqueue has only one "current" pool_workqueue associated with it. It may have multple pool_workqueues but only the first pool_workqueue servies new work items. For NUMA affinity, we want to change this so that there are multiple current pool_workqueues serving different NUMA nodes. Introduce workqueue->numa_pwq_tbl[] which is indexed by NUMA node and points to the pool_workqueue to use for each possible node. This replaces first_pwq() in __queue_work() and workqueue_congested(). numa_pwq_tbl[] is currently initialized to point to the same pool_workqueue as first_pwq() so this patch doesn't make any behavior changes. v2: Use rcu_dereference_raw() in unbound_pwq_by_node() as the function may be called only with wq->mutex held. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
This commit is contained in:
parent
2728fd2f09
commit
df2d5ae499
@ -257,6 +257,7 @@ struct workqueue_struct {
|
|||||||
/* hot fields used during command issue, aligned to cacheline */
|
/* hot fields used during command issue, aligned to cacheline */
|
||||||
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
|
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
|
||||||
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
|
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
|
||||||
|
struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kmem_cache *pwq_cache;
|
static struct kmem_cache *pwq_cache;
|
||||||
@ -525,6 +526,22 @@ static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
|
|||||||
pwqs_node);
|
pwqs_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* unbound_pwq_by_node - return the unbound pool_workqueue for the given node
|
||||||
|
* @wq: the target workqueue
|
||||||
|
* @node: the node ID
|
||||||
|
*
|
||||||
|
* This must be called either with pwq_lock held or sched RCU read locked.
|
||||||
|
* If the pwq needs to be used beyond the locking in effect, the caller is
|
||||||
|
* responsible for guaranteeing that the pwq stays online.
|
||||||
|
*/
|
||||||
|
static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
|
||||||
|
int node)
|
||||||
|
{
|
||||||
|
assert_rcu_or_wq_mutex(wq);
|
||||||
|
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned int work_color_to_flags(int color)
|
static unsigned int work_color_to_flags(int color)
|
||||||
{
|
{
|
||||||
return color << WORK_STRUCT_COLOR_SHIFT;
|
return color << WORK_STRUCT_COLOR_SHIFT;
|
||||||
@ -1278,14 +1295,14 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
|
|||||||
WARN_ON_ONCE(!is_chained_work(wq)))
|
WARN_ON_ONCE(!is_chained_work(wq)))
|
||||||
return;
|
return;
|
||||||
retry:
|
retry:
|
||||||
|
if (req_cpu == WORK_CPU_UNBOUND)
|
||||||
|
cpu = raw_smp_processor_id();
|
||||||
|
|
||||||
/* pwq which will be used unless @work is executing elsewhere */
|
/* pwq which will be used unless @work is executing elsewhere */
|
||||||
if (!(wq->flags & WQ_UNBOUND)) {
|
if (!(wq->flags & WQ_UNBOUND))
|
||||||
if (cpu == WORK_CPU_UNBOUND)
|
|
||||||
cpu = raw_smp_processor_id();
|
|
||||||
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
||||||
} else {
|
else
|
||||||
pwq = first_pwq(wq);
|
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If @work was previously on a different pool, it might still be
|
* If @work was previously on a different pool, it might still be
|
||||||
@ -1315,8 +1332,8 @@ retry:
|
|||||||
* pwq is determined and locked. For unbound pools, we could have
|
* pwq is determined and locked. For unbound pools, we could have
|
||||||
* raced with pwq release and it could already be dead. If its
|
* raced with pwq release and it could already be dead. If its
|
||||||
* refcnt is zero, repeat pwq selection. Note that pwqs never die
|
* refcnt is zero, repeat pwq selection. Note that pwqs never die
|
||||||
* without another pwq replacing it as the first pwq or while a
|
* without another pwq replacing it in the numa_pwq_tbl or while
|
||||||
* work item is executing on it, so the retying is guaranteed to
|
* work items are executing on it, so the retrying is guaranteed to
|
||||||
* make forward-progress.
|
* make forward-progress.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!pwq->refcnt)) {
|
if (unlikely(!pwq->refcnt)) {
|
||||||
@ -3614,6 +3631,8 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
|
|||||||
struct worker_pool *pool,
|
struct worker_pool *pool,
|
||||||
struct pool_workqueue **p_last_pwq)
|
struct pool_workqueue **p_last_pwq)
|
||||||
{
|
{
|
||||||
|
int node;
|
||||||
|
|
||||||
BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
|
BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
|
||||||
|
|
||||||
pwq->pool = pool;
|
pwq->pool = pool;
|
||||||
@ -3640,8 +3659,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
|
|||||||
/* link in @pwq */
|
/* link in @pwq */
|
||||||
list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
|
list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
|
||||||
|
|
||||||
if (wq->flags & WQ_UNBOUND)
|
if (wq->flags & WQ_UNBOUND) {
|
||||||
copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
|
copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
|
||||||
|
for_each_node(node)
|
||||||
|
rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_unlock(&wq->mutex);
|
mutex_unlock(&wq->mutex);
|
||||||
}
|
}
|
||||||
@ -3761,12 +3783,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
|
|||||||
struct lock_class_key *key,
|
struct lock_class_key *key,
|
||||||
const char *lock_name, ...)
|
const char *lock_name, ...)
|
||||||
{
|
{
|
||||||
|
size_t tbl_size = 0;
|
||||||
va_list args;
|
va_list args;
|
||||||
struct workqueue_struct *wq;
|
struct workqueue_struct *wq;
|
||||||
struct pool_workqueue *pwq;
|
struct pool_workqueue *pwq;
|
||||||
|
|
||||||
/* allocate wq and format name */
|
/* allocate wq and format name */
|
||||||
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
|
if (flags & WQ_UNBOUND)
|
||||||
|
tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
|
||||||
|
|
||||||
|
wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
|
||||||
if (!wq)
|
if (!wq)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -3994,7 +4020,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
|
|||||||
if (!(wq->flags & WQ_UNBOUND))
|
if (!(wq->flags & WQ_UNBOUND))
|
||||||
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
||||||
else
|
else
|
||||||
pwq = first_pwq(wq);
|
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
|
||||||
|
|
||||||
ret = !list_empty(&pwq->delayed_works);
|
ret = !list_empty(&pwq->delayed_works);
|
||||||
rcu_read_unlock_sched();
|
rcu_read_unlock_sched();
|
||||||
|
Loading…
Reference in New Issue
Block a user