io_uring: don't vmalloc rsrc tags

We don't really need vmalloc for keeping tags, it's not a hot path and
is there out of convenience, so replace it with two level tables to not
litter kernel virtual memory mappings.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/241a3422747113a8909e7e1030eb585d4a349e0d.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2021-06-14 02:36:21 +01:00 committed by Jens Axboe
parent 9123c8ffce
commit 2d091d62b1

View File

@ -100,6 +100,10 @@
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
#define IO_RSRC_TAG_TABLE_SHIFT 9
#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
#define IORING_MAX_REG_BUFFERS (1U << 14)
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
@ -243,7 +247,8 @@ typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
struct io_rsrc_data {
struct io_ring_ctx *ctx;
u64 *tags;
u64 **tags;
unsigned int nr;
rsrc_put_fn *do_put;
atomic_t refs;
struct completion done;
@ -7177,9 +7182,20 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
return ret;
}
static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
{
unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK;
unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT;
return &data->tags[table_idx][off];
}
static void io_rsrc_data_free(struct io_rsrc_data *data)
{
kvfree(data->tags);
size_t size = data->nr * sizeof(data->tags[0][0]);
if (data->tags)
io_free_page_table((void **)data->tags, size);
kfree(data);
}
@ -7188,33 +7204,37 @@ static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
struct io_rsrc_data **pdata)
{
struct io_rsrc_data *data;
int ret = -ENOMEM;
unsigned i;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0]));
if (!data->tags) {
kfree(data);
return -ENOMEM;
}
data->nr = nr;
data->ctx = ctx;
data->do_put = do_put;
if (utags) {
ret = -EFAULT;
for (i = 0; i < nr; i++) {
if (copy_from_user(&data->tags[i], &utags[i],
sizeof(data->tags[i]))) {
io_rsrc_data_free(data);
return -EFAULT;
}
if (copy_from_user(io_get_tag_slot(data, i), &utags[i],
sizeof(data->tags[i])))
goto fail;
}
}
atomic_set(&data->refs, 1);
data->ctx = ctx;
data->do_put = do_put;
init_completion(&data->done);
*pdata = data;
return 0;
fail:
io_rsrc_data_free(data);
return ret;
}
static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
@ -7683,7 +7703,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
/* allow sparse sets */
if (fd == -1) {
ret = -EINVAL;
if (unlikely(ctx->file_data->tags[i]))
if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
goto out_fput;
continue;
}
@ -7781,7 +7801,7 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
if (!prsrc)
return -ENOMEM;
prsrc->tag = data->tags[idx];
prsrc->tag = *io_get_tag_slot(data, idx);
prsrc->rsrc = rsrc;
list_add(&prsrc->list, &node->rsrc_list);
return 0;
@ -7851,7 +7871,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
data->tags[up->offset + done] = tag;
*io_get_tag_slot(data, up->offset + done) = tag;
io_fixed_file_set(file_slot, file);
err = io_sqe_file_register(ctx, file, i);
if (err) {
@ -8437,7 +8457,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
ret = io_buffer_validate(&iov);
if (ret)
break;
if (!iov.iov_base && data->tags[i]) {
if (!iov.iov_base && *io_get_tag_slot(data, i)) {
ret = -EINVAL;
break;
}
@ -8510,7 +8530,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
}
ctx->user_bufs[i] = imu;
ctx->buf_data->tags[offset] = tag;
*io_get_tag_slot(ctx->buf_data, offset) = tag;
}
if (needs_switch)