io_uring/rsrc: don't offload node free

struct delayed_work rsrc_put_work was previously used to offload node
freeing because io_rsrc_node_ref_zero() was previously called by RCU in
the IRQ context. Now, as percpu refcounting is gone, we can do it
eagerly at the spot without pushing it to a worker.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/13fb1aac1e8d068ad8fd4a0c6d0d157ab61b90c0.1680576071.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2023-04-04 13:39:53 +01:00 committed by Jens Axboe
parent ff7c75ecaa
commit 36b9818a5a
3 changed files with 4 additions and 64 deletions

View File

@ -330,9 +330,6 @@ struct io_ring_ctx {
struct io_rsrc_data *file_data;
struct io_rsrc_data *buf_data;
struct delayed_work rsrc_put_work;
struct callback_head rsrc_put_tw;
struct llist_head rsrc_put_llist;
/* protected by ->uring_lock */
struct list_head rsrc_ref_list;

View File

@ -326,9 +326,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->timeout_list);
INIT_LIST_HEAD(&ctx->ltimeout_list);
INIT_LIST_HEAD(&ctx->rsrc_ref_list);
INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
init_task_work(&ctx->rsrc_put_tw, io_rsrc_put_tw);
init_llist_head(&ctx->rsrc_put_llist);
init_llist_head(&ctx->work_llist);
INIT_LIST_HEAD(&ctx->tctx_list);
ctx->submit_state.free_list.next = NULL;
@ -2821,11 +2818,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_rsrc_node_destroy(ctx->rsrc_node);
if (ctx->rsrc_backup_node)
io_rsrc_node_destroy(ctx->rsrc_backup_node);
flush_delayed_work(&ctx->rsrc_put_work);
flush_delayed_work(&ctx->fallback_work);
WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {

View File

@ -145,15 +145,8 @@ static void io_rsrc_put_work_one(struct io_rsrc_data *rsrc_data,
{
struct io_ring_ctx *ctx = rsrc_data->ctx;
if (prsrc->tag) {
if (ctx->flags & IORING_SETUP_IOPOLL) {
mutex_lock(&ctx->uring_lock);
io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
mutex_unlock(&ctx->uring_lock);
} else {
io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
}
}
if (prsrc->tag)
io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
rsrc_data->do_put(ctx, prsrc);
}
@ -176,32 +169,6 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
complete(&rsrc_data->done);
}
void io_rsrc_put_work(struct work_struct *work)
{
struct io_ring_ctx *ctx;
struct llist_node *node;
ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
node = llist_del_all(&ctx->rsrc_put_llist);
while (node) {
struct io_rsrc_node *ref_node;
struct llist_node *next = node->next;
ref_node = llist_entry(node, struct io_rsrc_node, llist);
__io_rsrc_put_work(ref_node);
node = next;
}
}
void io_rsrc_put_tw(struct callback_head *cb)
{
struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx,
rsrc_put_tw);
io_rsrc_put_work(&ctx->rsrc_put_work.work);
}
void io_wait_rsrc_data(struct io_rsrc_data *data)
{
if (data && !atomic_dec_and_test(&data->refs))
@ -217,34 +184,18 @@ void io_rsrc_node_ref_zero(struct io_rsrc_node *node)
__must_hold(&node->rsrc_data->ctx->uring_lock)
{
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
bool first_add = false;
unsigned long delay = HZ;
node->done = true;
/* if we are mid-quiesce then do not delay */
if (node->rsrc_data->quiesce)
delay = 0;
while (!list_empty(&ctx->rsrc_ref_list)) {
node = list_first_entry(&ctx->rsrc_ref_list,
struct io_rsrc_node, node);
/* recycle ref nodes in order */
if (!node->done)
break;
list_del(&node->node);
first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
__io_rsrc_put_work(node);
}
if (!first_add)
return;
if (ctx->submitter_task) {
if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw,
ctx->notify_method))
return;
}
mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}
static struct io_rsrc_node *io_rsrc_node_alloc(void)
@ -320,13 +271,11 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
if (ret < 0) {
atomic_inc(&data->refs);
/* wait for all works potentially completing data->done */
flush_delayed_work(&ctx->rsrc_put_work);
reinit_completion(&data->done);
mutex_lock(&ctx->uring_lock);
break;
}
flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
if (!ret) {
mutex_lock(&ctx->uring_lock);