mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
for-6.5/io_uring-2023-06-23
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmSV8cEQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpnvZD/0QWstFCe1CSLWaycdC9fhWftFt3hyEIST5 CYEL56UZrDWNkv9xTLe855xvMavjd4sdHlUa8NUPghRQeJyKYgRxHBLXRWmy0uNN l47Zjiwsolmbr3Nt6qViLdCDYmG39ZGNwWo8b6p3ybWYLtzxeOblocOBTPzoCtkS hjo7Z0eMONvsvLX+l0o9IDdWtZIQ2fGo4VYkMIVb6CyxRPpuUuPKbE25qaTx+uBg Fy6Qa3SlaTwzqcg3dttggjIP792L/eETUCWGndg5pJNbrkj/fI4Vm4bEljID76eS HODl+pWHmyM6avVkypr7N3Tp5HKF0OTUa4vJTLIZo1QiRu6zlXphtuvGn+McEmgV hbYmQMYWzqJ22k2iEpCR58pdhmZJC9uB8r4Rwgr/t9GKqt4E+15EzmqkG9cUVMGV rfbBwVLwBUd5+0WHwQ8RzdtaUPt17vSIW/8WhU5zoMGVotqVBHO/H+5BtmKPWWpq fx1etQ8XJVPIxziJvgsEitb1s6KZzJspcONDlLEitmZkflv3gGdVm99KNbXwJpcp m6+FcYQ5d5FivfLPGgpx8go+4M2QuoW2yRGwZHu54buCnpxgNjIk898OjrUrdXCg 3/0m99GXmOWQQl0VrrTr+Fv99nVsQ2hMQzOFJGMYRtHEEc5xiTcJiZmoxmF7T7/C TipyW3czsw== =5Me8 -----END PGP SIGNATURE----- Merge tag 'for-6.5/io_uring-2023-06-23' of git://git.kernel.dk/linux Pull io_uring updates from Jens Axboe: "Nothing major in this release, just a bunch of cleanups and some optimizations around networking mostly. - clean up file request flags handling (Christoph) - clean up request freeing and CQ locking (Pavel) - support for using pre-registering the io_uring fd at setup time (Josh) - Add support for user allocated ring memory, rather than having the kernel allocate it. Mostly for packing rings into a huge page (me) - avoid an unnecessary double retry on receive (me) - maintain ordering for task_work, which also improves performance (me) - misc cleanups/fixes (Pavel, me)" * tag 'for-6.5/io_uring-2023-06-23' of git://git.kernel.dk/linux: (39 commits) io_uring: merge conditional unlock flush helpers io_uring: make io_cq_unlock_post static io_uring: inline __io_cq_unlock io_uring: fix acquire/release annotations io_uring: kill io_cq_unlock() io_uring: remove IOU_F_TWQ_FORCE_NORMAL io_uring: don't batch task put on reqs free io_uring: move io_clean_op() io_uring: inline io_dismantle_req() io_uring: remove io_free_req_tw io_uring: open code io_put_req_find_next io_uring: add helpers to decode the fixed file file_ptr io_uring: use io_file_from_index in io_msg_grab_file io_uring: use io_file_from_index in __io_sync_cancel io_uring: return REQ_F_ flags from io_file_get_flags io_uring: remove io_req_ffs_set io_uring: remove a confusing comment above io_file_get_flags io_uring: remove the mode variable in io_file_get_flags io_uring: remove __io_file_supports_nowait io_uring: wait interruptibly for request completions on exit ...
This commit is contained in:
commit
0aa69d53ac
@ -481,7 +481,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
||||
* during an unstable branch.
|
||||
*/
|
||||
filp->f_flags |= O_LARGEFILE;
|
||||
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
|
||||
filp->f_mode |= FMODE_BUF_RASYNC;
|
||||
|
||||
if (filp->f_flags & O_NDELAY)
|
||||
filp->f_mode |= FMODE_NDELAY;
|
||||
@ -494,6 +494,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
|
||||
if (bdev_nowait(bdev))
|
||||
filp->f_mode |= FMODE_NOWAIT;
|
||||
|
||||
filp->private_data = bdev;
|
||||
filp->f_mapping = bdev->bd_inode->i_mapping;
|
||||
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
|
||||
|
@ -521,7 +521,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
|
||||
if (cookie != NULL && blk_rq_is_poll(req))
|
||||
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
|
||||
else
|
||||
io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
|
||||
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
|
||||
|
||||
return RQ_END_IO_FREE;
|
||||
}
|
||||
@ -543,7 +543,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
|
||||
if (cookie != NULL && blk_rq_is_poll(req))
|
||||
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
|
||||
else
|
||||
io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb);
|
||||
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
|
||||
|
||||
return RQ_END_IO_NONE;
|
||||
}
|
||||
|
@ -46,13 +46,23 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
@ -85,6 +95,10 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
return NULL;
|
||||
|
@ -211,6 +211,16 @@ struct io_ring_ctx {
|
||||
unsigned int compat: 1;
|
||||
|
||||
enum task_work_notify_mode notify_method;
|
||||
|
||||
/*
|
||||
* If IORING_SETUP_NO_MMAP is used, then the below holds
|
||||
* the gup'ed pages for the two rings, and the sqes.
|
||||
*/
|
||||
unsigned short n_ring_pages;
|
||||
unsigned short n_sqe_pages;
|
||||
struct page **ring_pages;
|
||||
struct page **sqe_pages;
|
||||
|
||||
struct io_rings *rings;
|
||||
struct task_struct *submitter_task;
|
||||
struct percpu_ref refs;
|
||||
|
@ -173,6 +173,18 @@ enum {
|
||||
*/
|
||||
#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
|
||||
|
||||
/*
|
||||
* Application provides the memory for the rings
|
||||
*/
|
||||
#define IORING_SETUP_NO_MMAP (1U << 14)
|
||||
|
||||
/*
|
||||
* Register the ring fd in itself for use with
|
||||
* IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
|
||||
* than an fd.
|
||||
*/
|
||||
#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
|
||||
|
||||
enum io_uring_op {
|
||||
IORING_OP_NOP,
|
||||
IORING_OP_READV,
|
||||
@ -406,7 +418,7 @@ struct io_sqring_offsets {
|
||||
__u32 dropped;
|
||||
__u32 array;
|
||||
__u32 resv1;
|
||||
__u64 resv2;
|
||||
__u64 user_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -425,7 +437,7 @@ struct io_cqring_offsets {
|
||||
__u32 cqes;
|
||||
__u32 flags;
|
||||
__u32 resv1;
|
||||
__u64 resv2;
|
||||
__u64 user_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -216,13 +216,10 @@ static int __io_sync_cancel(struct io_uring_task *tctx,
|
||||
/* fixed must be grabbed every time since we drop the uring_lock */
|
||||
if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
|
||||
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
|
||||
unsigned long file_ptr;
|
||||
|
||||
if (unlikely(fd >= ctx->nr_user_files))
|
||||
return -EBADF;
|
||||
fd = array_index_nospec(fd, ctx->nr_user_files);
|
||||
file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
|
||||
cd->file = (struct file *) (file_ptr & FFS_MASK);
|
||||
cd->file = io_file_from_index(&ctx->file_table, fd);
|
||||
if (!cd->file)
|
||||
return -EBADF;
|
||||
}
|
||||
|
@ -78,10 +78,8 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
||||
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
|
||||
|
||||
if (file_slot->file_ptr) {
|
||||
struct file *old_file;
|
||||
|
||||
old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, slot_index, old_file);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
|
||||
io_slot_file(file_slot));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -140,7 +138,6 @@ int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
|
||||
int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
|
||||
{
|
||||
struct io_fixed_file *file_slot;
|
||||
struct file *file;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!ctx->file_data))
|
||||
@ -153,8 +150,8 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
|
||||
if (!file_slot->file_ptr)
|
||||
return -EBADF;
|
||||
|
||||
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, offset, file);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, offset,
|
||||
io_slot_file(file_slot));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -5,10 +5,6 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
#define FFS_NOWAIT 0x1UL
|
||||
#define FFS_ISREG 0x2UL
|
||||
#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
|
||||
|
||||
bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files);
|
||||
void io_free_file_tables(struct io_file_table *table);
|
||||
|
||||
@ -43,21 +39,31 @@ io_fixed_file_slot(struct io_file_table *table, unsigned i)
|
||||
return &table->files[i];
|
||||
}
|
||||
|
||||
#define FFS_NOWAIT 0x1UL
|
||||
#define FFS_ISREG 0x2UL
|
||||
#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
|
||||
|
||||
static inline unsigned int io_slot_flags(struct io_fixed_file *slot)
|
||||
{
|
||||
return (slot->file_ptr & ~FFS_MASK) << REQ_F_SUPPORT_NOWAIT_BIT;
|
||||
}
|
||||
|
||||
static inline struct file *io_slot_file(struct io_fixed_file *slot)
|
||||
{
|
||||
return (struct file *)(slot->file_ptr & FFS_MASK);
|
||||
}
|
||||
|
||||
static inline struct file *io_file_from_index(struct io_file_table *table,
|
||||
int index)
|
||||
{
|
||||
struct io_fixed_file *slot = io_fixed_file_slot(table, index);
|
||||
|
||||
return (struct file *) (slot->file_ptr & FFS_MASK);
|
||||
return io_slot_file(io_fixed_file_slot(table, index));
|
||||
}
|
||||
|
||||
static inline void io_fixed_file_set(struct io_fixed_file *file_slot,
|
||||
struct file *file)
|
||||
{
|
||||
unsigned long file_ptr = (unsigned long) file;
|
||||
|
||||
file_ptr |= io_file_get_flags(file);
|
||||
file_slot->file_ptr = file_ptr;
|
||||
file_slot->file_ptr = (unsigned long)file |
|
||||
(io_file_get_flags(file) >> REQ_F_SUPPORT_NOWAIT_BIT);
|
||||
}
|
||||
|
||||
static inline void io_reset_alloc_hint(struct io_ring_ctx *ctx)
|
||||
|
@ -95,6 +95,7 @@
|
||||
|
||||
#include "timeout.h"
|
||||
#include "poll.h"
|
||||
#include "rw.h"
|
||||
#include "alloc_cache.h"
|
||||
|
||||
#define IORING_MAX_ENTRIES 32768
|
||||
@ -145,8 +146,6 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
|
||||
struct task_struct *task,
|
||||
bool cancel_all);
|
||||
|
||||
static void io_dismantle_req(struct io_kiocb *req);
|
||||
static void io_clean_op(struct io_kiocb *req);
|
||||
static void io_queue_sqe(struct io_kiocb *req);
|
||||
static void io_move_task_work_from_local(struct io_ring_ctx *ctx);
|
||||
static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
|
||||
@ -367,6 +366,39 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void io_clean_op(struct io_kiocb *req)
|
||||
{
|
||||
if (req->flags & REQ_F_BUFFER_SELECTED) {
|
||||
spin_lock(&req->ctx->completion_lock);
|
||||
io_put_kbuf_comp(req);
|
||||
spin_unlock(&req->ctx->completion_lock);
|
||||
}
|
||||
|
||||
if (req->flags & REQ_F_NEED_CLEANUP) {
|
||||
const struct io_cold_def *def = &io_cold_defs[req->opcode];
|
||||
|
||||
if (def->cleanup)
|
||||
def->cleanup(req);
|
||||
}
|
||||
if ((req->flags & REQ_F_POLLED) && req->apoll) {
|
||||
kfree(req->apoll->double_poll);
|
||||
kfree(req->apoll);
|
||||
req->apoll = NULL;
|
||||
}
|
||||
if (req->flags & REQ_F_INFLIGHT) {
|
||||
struct io_uring_task *tctx = req->task->io_uring;
|
||||
|
||||
atomic_dec(&tctx->inflight_tracked);
|
||||
}
|
||||
if (req->flags & REQ_F_CREDS)
|
||||
put_cred(req->creds);
|
||||
if (req->flags & REQ_F_ASYNC_DATA) {
|
||||
kfree(req->async_data);
|
||||
req->async_data = NULL;
|
||||
}
|
||||
req->flags &= ~IO_REQ_CLEAN_FLAGS;
|
||||
}
|
||||
|
||||
static inline void io_req_track_inflight(struct io_kiocb *req)
|
||||
{
|
||||
if (!(req->flags & REQ_F_INFLIGHT)) {
|
||||
@ -423,8 +455,8 @@ static void io_prep_async_work(struct io_kiocb *req)
|
||||
if (req->flags & REQ_F_FORCE_ASYNC)
|
||||
req->work.flags |= IO_WQ_WORK_CONCURRENT;
|
||||
|
||||
if (req->file && !io_req_ffs_set(req))
|
||||
req->flags |= io_file_get_flags(req->file) << REQ_F_SUPPORT_NOWAIT_BIT;
|
||||
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
|
||||
req->flags |= io_file_get_flags(req->file);
|
||||
|
||||
if (req->file && (req->flags & REQ_F_ISREG)) {
|
||||
bool should_hash = def->hash_reg_file;
|
||||
@ -594,42 +626,18 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
|
||||
}
|
||||
|
||||
static inline void __io_cq_lock(struct io_ring_ctx *ctx)
|
||||
__acquires(ctx->completion_lock)
|
||||
{
|
||||
if (!ctx->task_complete)
|
||||
spin_lock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (!ctx->task_complete)
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
static inline void io_cq_lock(struct io_ring_ctx *ctx)
|
||||
__acquires(ctx->completion_lock)
|
||||
{
|
||||
spin_lock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
static inline void io_cq_unlock(struct io_ring_ctx *ctx)
|
||||
__releases(ctx->completion_lock)
|
||||
{
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
/* keep it inlined for io_submit_flush_completions() */
|
||||
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
|
||||
__releases(ctx->completion_lock)
|
||||
{
|
||||
io_commit_cqring(ctx);
|
||||
__io_cq_unlock(ctx);
|
||||
io_commit_cqring_flush(ctx);
|
||||
io_cqring_wake(ctx);
|
||||
}
|
||||
|
||||
static void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
|
||||
__releases(ctx->completion_lock)
|
||||
{
|
||||
io_commit_cqring(ctx);
|
||||
|
||||
@ -641,13 +649,13 @@ static void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
|
||||
*/
|
||||
io_commit_cqring_flush(ctx);
|
||||
} else {
|
||||
__io_cq_unlock(ctx);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
io_commit_cqring_flush(ctx);
|
||||
io_cqring_wake(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
void io_cq_unlock_post(struct io_ring_ctx *ctx)
|
||||
static void io_cq_unlock_post(struct io_ring_ctx *ctx)
|
||||
__releases(ctx->completion_lock)
|
||||
{
|
||||
io_commit_cqring(ctx);
|
||||
@ -662,10 +670,10 @@ static void io_cqring_overflow_kill(struct io_ring_ctx *ctx)
|
||||
struct io_overflow_cqe *ocqe;
|
||||
LIST_HEAD(list);
|
||||
|
||||
io_cq_lock(ctx);
|
||||
spin_lock(&ctx->completion_lock);
|
||||
list_splice_init(&ctx->cq_overflow_list, &list);
|
||||
clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
|
||||
io_cq_unlock(ctx);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
while (!list_empty(&list)) {
|
||||
ocqe = list_first_entry(&list, struct io_overflow_cqe, list);
|
||||
@ -722,29 +730,29 @@ static void io_cqring_overflow_flush(struct io_ring_ctx *ctx)
|
||||
}
|
||||
|
||||
/* can be called by any task */
|
||||
static void io_put_task_remote(struct task_struct *task, int nr)
|
||||
static void io_put_task_remote(struct task_struct *task)
|
||||
{
|
||||
struct io_uring_task *tctx = task->io_uring;
|
||||
|
||||
percpu_counter_sub(&tctx->inflight, nr);
|
||||
percpu_counter_sub(&tctx->inflight, 1);
|
||||
if (unlikely(atomic_read(&tctx->in_cancel)))
|
||||
wake_up(&tctx->wait);
|
||||
put_task_struct_many(task, nr);
|
||||
put_task_struct(task);
|
||||
}
|
||||
|
||||
/* used by a task to put its own references */
|
||||
static void io_put_task_local(struct task_struct *task, int nr)
|
||||
static void io_put_task_local(struct task_struct *task)
|
||||
{
|
||||
task->io_uring->cached_refs += nr;
|
||||
task->io_uring->cached_refs++;
|
||||
}
|
||||
|
||||
/* must to be called somewhat shortly after putting a request */
|
||||
static inline void io_put_task(struct task_struct *task, int nr)
|
||||
static inline void io_put_task(struct task_struct *task)
|
||||
{
|
||||
if (likely(task == current))
|
||||
io_put_task_local(task, nr);
|
||||
io_put_task_local(task);
|
||||
else
|
||||
io_put_task_remote(task, nr);
|
||||
io_put_task_remote(task);
|
||||
}
|
||||
|
||||
void io_task_refs_refill(struct io_uring_task *tctx)
|
||||
@ -934,20 +942,19 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
|
||||
return __io_post_aux_cqe(ctx, user_data, res, cflags, true);
|
||||
}
|
||||
|
||||
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
|
||||
bool io_aux_cqe(const struct io_kiocb *req, bool defer, s32 res, u32 cflags,
|
||||
bool allow_overflow)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
u64 user_data = req->cqe.user_data;
|
||||
struct io_uring_cqe *cqe;
|
||||
unsigned int length;
|
||||
|
||||
if (!defer)
|
||||
return __io_post_aux_cqe(ctx, user_data, res, cflags, allow_overflow);
|
||||
|
||||
length = ARRAY_SIZE(ctx->submit_state.cqes);
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
if (ctx->submit_state.cqes_count == length) {
|
||||
if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->submit_state.cqes)) {
|
||||
__io_cq_lock(ctx);
|
||||
__io_flush_post_cqes(ctx);
|
||||
/* no need to flush - flush is deferred */
|
||||
@ -991,14 +998,18 @@ static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
|
||||
}
|
||||
}
|
||||
io_put_kbuf_comp(req);
|
||||
io_dismantle_req(req);
|
||||
if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS))
|
||||
io_clean_op(req);
|
||||
if (!(req->flags & REQ_F_FIXED_FILE))
|
||||
io_put_file(req->file);
|
||||
|
||||
rsrc_node = req->rsrc_node;
|
||||
/*
|
||||
* Selected buffer deallocation in io_clean_op() assumes that
|
||||
* we don't hold ->completion_lock. Clean them here to avoid
|
||||
* deadlocks.
|
||||
*/
|
||||
io_put_task_remote(req->task, 1);
|
||||
io_put_task_remote(req->task);
|
||||
wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
|
||||
ctx->locked_free_nr++;
|
||||
}
|
||||
@ -1111,36 +1122,13 @@ __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void io_dismantle_req(struct io_kiocb *req)
|
||||
{
|
||||
unsigned int flags = req->flags;
|
||||
|
||||
if (unlikely(flags & IO_REQ_CLEAN_FLAGS))
|
||||
io_clean_op(req);
|
||||
if (!(flags & REQ_F_FIXED_FILE))
|
||||
io_put_file(req->file);
|
||||
}
|
||||
|
||||
static __cold void io_free_req_tw(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
if (req->rsrc_node) {
|
||||
io_tw_lock(ctx, ts);
|
||||
io_put_rsrc_node(ctx, req->rsrc_node);
|
||||
}
|
||||
io_dismantle_req(req);
|
||||
io_put_task_remote(req->task, 1);
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
|
||||
ctx->locked_free_nr++;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
__cold void io_free_req(struct io_kiocb *req)
|
||||
{
|
||||
req->io_task_work.func = io_free_req_tw;
|
||||
/* refs were already put, restore them for io_req_task_complete() */
|
||||
req->flags &= ~REQ_F_REFCOUNT;
|
||||
/* we only want to free it, don't post CQEs */
|
||||
req->flags |= REQ_F_CQE_SKIP;
|
||||
req->io_task_work.func = io_req_task_complete;
|
||||
io_req_task_work_add(req);
|
||||
}
|
||||
|
||||
@ -1205,7 +1193,9 @@ static unsigned int handle_tw_list(struct llist_node *node,
|
||||
ts->locked = mutex_trylock(&(*ctx)->uring_lock);
|
||||
percpu_ref_get(&(*ctx)->refs);
|
||||
}
|
||||
req->io_task_work.func(req, ts);
|
||||
INDIRECT_CALL_2(req->io_task_work.func,
|
||||
io_poll_task_func, io_req_rw_complete,
|
||||
req, ts);
|
||||
node = next;
|
||||
count++;
|
||||
if (unlikely(need_resched())) {
|
||||
@ -1303,7 +1293,7 @@ static __cold void io_fallback_tw(struct io_uring_task *tctx)
|
||||
}
|
||||
}
|
||||
|
||||
static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
|
||||
static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned nr_wait, nr_tw, nr_tw_prev;
|
||||
@ -1354,19 +1344,11 @@ static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
|
||||
wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
|
||||
static void io_req_normal_work_add(struct io_kiocb *req)
|
||||
{
|
||||
struct io_uring_task *tctx = req->task->io_uring;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
if (!(flags & IOU_F_TWQ_FORCE_NORMAL) &&
|
||||
(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) {
|
||||
rcu_read_lock();
|
||||
io_req_local_work_add(req, flags);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
/* task_work already pending, we're done */
|
||||
if (!llist_add(&req->io_task_work.node, &tctx->task_list))
|
||||
return;
|
||||
@ -1380,6 +1362,17 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
|
||||
io_fallback_tw(tctx);
|
||||
}
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
|
||||
{
|
||||
if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
|
||||
rcu_read_lock();
|
||||
io_req_local_work_add(req, flags);
|
||||
rcu_read_unlock();
|
||||
} else {
|
||||
io_req_normal_work_add(req);
|
||||
}
|
||||
}
|
||||
|
||||
static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct llist_node *node;
|
||||
@ -1390,7 +1383,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
|
||||
io_task_work.node);
|
||||
|
||||
node = node->next;
|
||||
__io_req_task_work_add(req, IOU_F_TWQ_FORCE_NORMAL);
|
||||
io_req_normal_work_add(req);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1405,13 +1398,19 @@ static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts)
|
||||
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
|
||||
atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
|
||||
again:
|
||||
node = io_llist_xchg(&ctx->work_llist, NULL);
|
||||
/*
|
||||
* llists are in reverse order, flip it back the right way before
|
||||
* running the pending items.
|
||||
*/
|
||||
node = llist_reverse_order(io_llist_xchg(&ctx->work_llist, NULL));
|
||||
while (node) {
|
||||
struct llist_node *next = node->next;
|
||||
struct io_kiocb *req = container_of(node, struct io_kiocb,
|
||||
io_task_work.node);
|
||||
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
|
||||
req->io_task_work.func(req, ts);
|
||||
INDIRECT_CALL_2(req->io_task_work.func,
|
||||
io_poll_task_func, io_req_rw_complete,
|
||||
req, ts);
|
||||
ret++;
|
||||
node = next;
|
||||
}
|
||||
@ -1498,9 +1497,6 @@ void io_queue_next(struct io_kiocb *req)
|
||||
void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
struct task_struct *task = NULL;
|
||||
int task_refs = 0;
|
||||
|
||||
do {
|
||||
struct io_kiocb *req = container_of(node, struct io_kiocb,
|
||||
comp_list);
|
||||
@ -1530,19 +1526,10 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
|
||||
|
||||
io_req_put_rsrc_locked(req, ctx);
|
||||
|
||||
if (req->task != task) {
|
||||
if (task)
|
||||
io_put_task(task, task_refs);
|
||||
task = req->task;
|
||||
task_refs = 0;
|
||||
}
|
||||
task_refs++;
|
||||
io_put_task(req->task);
|
||||
node = req->comp_list.next;
|
||||
io_req_add_to_cache(req, ctx);
|
||||
} while (node);
|
||||
|
||||
if (task)
|
||||
io_put_task(task, task_refs);
|
||||
}
|
||||
|
||||
static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
|
||||
@ -1570,7 +1557,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
__io_cq_unlock_post_flush(ctx);
|
||||
__io_cq_unlock_post(ctx);
|
||||
|
||||
if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
|
||||
io_free_batch_list(ctx, state->compl_reqs.first);
|
||||
@ -1578,22 +1565,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop reference to request, return next in chain (if there is one) if this
|
||||
* was the last reference to this request.
|
||||
*/
|
||||
static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
|
||||
{
|
||||
struct io_kiocb *nxt = NULL;
|
||||
|
||||
if (req_ref_put_and_test(req)) {
|
||||
if (unlikely(req->flags & IO_REQ_LINK_FLAGS))
|
||||
nxt = io_req_find_next(req);
|
||||
io_free_req(req);
|
||||
}
|
||||
return nxt;
|
||||
}
|
||||
|
||||
static unsigned io_cqring_events(struct io_ring_ctx *ctx)
|
||||
{
|
||||
/* See comment at the top of this file */
|
||||
@ -1758,54 +1729,14 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
|
||||
}
|
||||
}
|
||||
|
||||
static bool io_bdev_nowait(struct block_device *bdev)
|
||||
{
|
||||
return !bdev || bdev_nowait(bdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we tracked the file through the SCM inflight mechanism, we could support
|
||||
* any file. For now, just ensure that anything potentially problematic is done
|
||||
* inline.
|
||||
*/
|
||||
static bool __io_file_supports_nowait(struct file *file, umode_t mode)
|
||||
{
|
||||
if (S_ISBLK(mode)) {
|
||||
if (IS_ENABLED(CONFIG_BLOCK) &&
|
||||
io_bdev_nowait(I_BDEV(file->f_mapping->host)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
if (S_ISSOCK(mode))
|
||||
return true;
|
||||
if (S_ISREG(mode)) {
|
||||
if (IS_ENABLED(CONFIG_BLOCK) &&
|
||||
io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
|
||||
!io_is_uring_fops(file))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* any ->read/write should understand O_NONBLOCK */
|
||||
if (file->f_flags & O_NONBLOCK)
|
||||
return true;
|
||||
return file->f_mode & FMODE_NOWAIT;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we tracked the file through the SCM inflight mechanism, we could support
|
||||
* any file. For now, just ensure that anything potentially problematic is done
|
||||
* inline.
|
||||
*/
|
||||
unsigned int io_file_get_flags(struct file *file)
|
||||
{
|
||||
umode_t mode = file_inode(file)->i_mode;
|
||||
unsigned int res = 0;
|
||||
|
||||
if (S_ISREG(mode))
|
||||
res |= FFS_ISREG;
|
||||
if (__io_file_supports_nowait(file, mode))
|
||||
res |= FFS_NOWAIT;
|
||||
if (S_ISREG(file_inode(file)->i_mode))
|
||||
res |= REQ_F_ISREG;
|
||||
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
|
||||
res |= REQ_F_SUPPORT_NOWAIT;
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1891,39 +1822,6 @@ queue:
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
static void io_clean_op(struct io_kiocb *req)
|
||||
{
|
||||
if (req->flags & REQ_F_BUFFER_SELECTED) {
|
||||
spin_lock(&req->ctx->completion_lock);
|
||||
io_put_kbuf_comp(req);
|
||||
spin_unlock(&req->ctx->completion_lock);
|
||||
}
|
||||
|
||||
if (req->flags & REQ_F_NEED_CLEANUP) {
|
||||
const struct io_cold_def *def = &io_cold_defs[req->opcode];
|
||||
|
||||
if (def->cleanup)
|
||||
def->cleanup(req);
|
||||
}
|
||||
if ((req->flags & REQ_F_POLLED) && req->apoll) {
|
||||
kfree(req->apoll->double_poll);
|
||||
kfree(req->apoll);
|
||||
req->apoll = NULL;
|
||||
}
|
||||
if (req->flags & REQ_F_INFLIGHT) {
|
||||
struct io_uring_task *tctx = req->task->io_uring;
|
||||
|
||||
atomic_dec(&tctx->inflight_tracked);
|
||||
}
|
||||
if (req->flags & REQ_F_CREDS)
|
||||
put_cred(req->creds);
|
||||
if (req->flags & REQ_F_ASYNC_DATA) {
|
||||
kfree(req->async_data);
|
||||
req->async_data = NULL;
|
||||
}
|
||||
req->flags &= ~IO_REQ_CLEAN_FLAGS;
|
||||
}
|
||||
|
||||
static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
@ -1986,9 +1884,14 @@ int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
|
||||
{
|
||||
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
|
||||
struct io_kiocb *nxt = NULL;
|
||||
|
||||
req = io_put_req_find_next(req);
|
||||
return req ? &req->work : NULL;
|
||||
if (req_ref_put_and_test(req)) {
|
||||
if (req->flags & IO_REQ_LINK_FLAGS)
|
||||
nxt = io_req_find_next(req);
|
||||
io_free_req(req);
|
||||
}
|
||||
return nxt ? &nxt->work : NULL;
|
||||
}
|
||||
|
||||
void io_wq_submit_work(struct io_wq_work *work)
|
||||
@ -2060,19 +1963,17 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_fixed_file *slot;
|
||||
struct file *file = NULL;
|
||||
unsigned long file_ptr;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
|
||||
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
|
||||
goto out;
|
||||
fd = array_index_nospec(fd, ctx->nr_user_files);
|
||||
file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
|
||||
file = (struct file *) (file_ptr & FFS_MASK);
|
||||
file_ptr &= ~FFS_MASK;
|
||||
/* mask in overlapping REQ_F and FFS bits */
|
||||
req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
|
||||
slot = io_fixed_file_slot(&ctx->file_table, fd);
|
||||
file = io_slot_file(slot);
|
||||
req->flags |= io_slot_flags(slot);
|
||||
io_req_set_rsrc_node(req, ctx, 0);
|
||||
out:
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
@ -2709,11 +2610,96 @@ static void io_mem_free(void *ptr)
|
||||
free_compound_page(page);
|
||||
}
|
||||
|
||||
static void io_pages_free(struct page ***pages, int npages)
|
||||
{
|
||||
struct page **page_array;
|
||||
int i;
|
||||
|
||||
if (!pages)
|
||||
return;
|
||||
page_array = *pages;
|
||||
for (i = 0; i < npages; i++)
|
||||
unpin_user_page(page_array[i]);
|
||||
kvfree(page_array);
|
||||
*pages = NULL;
|
||||
}
|
||||
|
||||
static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
|
||||
unsigned long uaddr, size_t size)
|
||||
{
|
||||
struct page **page_array;
|
||||
unsigned int nr_pages;
|
||||
int ret;
|
||||
|
||||
*npages = 0;
|
||||
|
||||
if (uaddr & (PAGE_SIZE - 1) || !size)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (nr_pages > USHRT_MAX)
|
||||
return ERR_PTR(-EINVAL);
|
||||
page_array = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!page_array)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
|
||||
page_array);
|
||||
if (ret != nr_pages) {
|
||||
err:
|
||||
io_pages_free(&page_array, ret > 0 ? ret : 0);
|
||||
return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
|
||||
}
|
||||
/*
|
||||
* Should be a single page. If the ring is small enough that we can
|
||||
* use a normal page, that is fine. If we need multiple pages, then
|
||||
* userspace should use a huge page. That's the only way to guarantee
|
||||
* that we get contigious memory, outside of just being lucky or
|
||||
* (currently) having low memory fragmentation.
|
||||
*/
|
||||
if (page_array[0] != page_array[ret - 1])
|
||||
goto err;
|
||||
*pages = page_array;
|
||||
*npages = nr_pages;
|
||||
return page_to_virt(page_array[0]);
|
||||
}
|
||||
|
||||
static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
|
||||
size_t size)
|
||||
{
|
||||
return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
|
||||
size);
|
||||
}
|
||||
|
||||
static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
|
||||
size_t size)
|
||||
{
|
||||
return __io_uaddr_map(&ctx->sqe_pages, &ctx->n_sqe_pages, uaddr,
|
||||
size);
|
||||
}
|
||||
|
||||
static void io_rings_free(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
|
||||
io_mem_free(ctx->rings);
|
||||
io_mem_free(ctx->sq_sqes);
|
||||
ctx->rings = NULL;
|
||||
ctx->sq_sqes = NULL;
|
||||
} else {
|
||||
io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
|
||||
io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
|
||||
}
|
||||
}
|
||||
|
||||
static void *io_mem_alloc(size_t size)
|
||||
{
|
||||
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
|
||||
void *ret;
|
||||
|
||||
return (void *) __get_free_pages(gfp, get_order(size));
|
||||
ret = (void *) __get_free_pages(gfp, get_order(size));
|
||||
if (ret)
|
||||
return ret;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
|
||||
@ -2869,8 +2855,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||
mmdrop(ctx->mm_account);
|
||||
ctx->mm_account = NULL;
|
||||
}
|
||||
io_mem_free(ctx->rings);
|
||||
io_mem_free(ctx->sq_sqes);
|
||||
io_rings_free(ctx);
|
||||
|
||||
percpu_ref_exit(&ctx->refs);
|
||||
free_uid(ctx->user);
|
||||
@ -3050,7 +3035,18 @@ static __cold void io_ring_exit_work(struct work_struct *work)
|
||||
/* there is little hope left, don't run it too often */
|
||||
interval = HZ * 60;
|
||||
}
|
||||
} while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
|
||||
/*
|
||||
* This is really an uninterruptible wait, as it has to be
|
||||
* complete. But it's also run from a kworker, which doesn't
|
||||
* take signals, so it's fine to make it interruptible. This
|
||||
* avoids scenarios where we knowingly can wait much longer
|
||||
* on completions, for example if someone does a SIGSTOP on
|
||||
* a task that needs to finish task_work to make this loop
|
||||
* complete. That's a synthetic situation that should not
|
||||
* cause a stuck task backtrace, and hence a potential panic
|
||||
* on stuck tasks if that is enabled.
|
||||
*/
|
||||
} while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval));
|
||||
|
||||
init_completion(&exit.completion);
|
||||
init_task_work(&exit.task_work, io_tctx_exit_cb);
|
||||
@ -3074,7 +3070,12 @@ static __cold void io_ring_exit_work(struct work_struct *work)
|
||||
continue;
|
||||
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
wait_for_completion(&exit.completion);
|
||||
/*
|
||||
* See comment above for
|
||||
* wait_for_completion_interruptible_timeout() on why this
|
||||
* wait is marked as interruptible.
|
||||
*/
|
||||
wait_for_completion_interruptible(&exit.completion);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
}
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
@ -3348,6 +3349,10 @@ static void *io_uring_validate_mmap_request(struct file *file,
|
||||
struct page *page;
|
||||
void *ptr;
|
||||
|
||||
/* Don't allow mmap if the ring was setup without it */
|
||||
if (ctx->flags & IORING_SETUP_NO_MMAP)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
switch (offset & IORING_OFF_MMAP_MASK) {
|
||||
case IORING_OFF_SQ_RING:
|
||||
case IORING_OFF_CQ_RING:
|
||||
@ -3673,6 +3678,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
|
||||
{
|
||||
struct io_rings *rings;
|
||||
size_t size, sq_array_offset;
|
||||
void *ptr;
|
||||
|
||||
/* make sure these are sane, as we already accounted them */
|
||||
ctx->sq_entries = p->sq_entries;
|
||||
@ -3682,9 +3688,13 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
|
||||
if (size == SIZE_MAX)
|
||||
return -EOVERFLOW;
|
||||
|
||||
rings = io_mem_alloc(size);
|
||||
if (!rings)
|
||||
return -ENOMEM;
|
||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
|
||||
rings = io_mem_alloc(size);
|
||||
else
|
||||
rings = io_rings_map(ctx, p->cq_off.user_addr, size);
|
||||
|
||||
if (IS_ERR(rings))
|
||||
return PTR_ERR(rings);
|
||||
|
||||
ctx->rings = rings;
|
||||
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
|
||||
@ -3698,34 +3708,31 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
|
||||
else
|
||||
size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
|
||||
if (size == SIZE_MAX) {
|
||||
io_mem_free(ctx->rings);
|
||||
ctx->rings = NULL;
|
||||
io_rings_free(ctx);
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
|
||||
ctx->sq_sqes = io_mem_alloc(size);
|
||||
if (!ctx->sq_sqes) {
|
||||
io_mem_free(ctx->rings);
|
||||
ctx->rings = NULL;
|
||||
return -ENOMEM;
|
||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
|
||||
ptr = io_mem_alloc(size);
|
||||
else
|
||||
ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
|
||||
|
||||
if (IS_ERR(ptr)) {
|
||||
io_rings_free(ctx);
|
||||
return PTR_ERR(ptr);
|
||||
}
|
||||
|
||||
ctx->sq_sqes = ptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
|
||||
static int io_uring_install_fd(struct file *file)
|
||||
{
|
||||
int ret, fd;
|
||||
int fd;
|
||||
|
||||
fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
ret = __io_uring_add_tctx_node(ctx);
|
||||
if (ret) {
|
||||
put_unused_fd(fd);
|
||||
return ret;
|
||||
}
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
}
|
||||
@ -3765,6 +3772,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
struct io_uring_params __user *params)
|
||||
{
|
||||
struct io_ring_ctx *ctx;
|
||||
struct io_uring_task *tctx;
|
||||
struct file *file;
|
||||
int ret;
|
||||
|
||||
@ -3776,6 +3784,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
entries = IORING_MAX_ENTRIES;
|
||||
}
|
||||
|
||||
if ((p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
|
||||
&& !(p->flags & IORING_SETUP_NO_MMAP))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Use twice as many entries for the CQ ring. It's possible for the
|
||||
* application to drive a higher depth than the size of the SQ ring,
|
||||
@ -3887,7 +3899,6 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
memset(&p->sq_off, 0, sizeof(p->sq_off));
|
||||
p->sq_off.head = offsetof(struct io_rings, sq.head);
|
||||
p->sq_off.tail = offsetof(struct io_rings, sq.tail);
|
||||
p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
|
||||
@ -3895,8 +3906,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
p->sq_off.flags = offsetof(struct io_rings, sq_flags);
|
||||
p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
|
||||
p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
|
||||
p->sq_off.resv1 = 0;
|
||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
|
||||
p->sq_off.user_addr = 0;
|
||||
|
||||
memset(&p->cq_off, 0, sizeof(p->cq_off));
|
||||
p->cq_off.head = offsetof(struct io_rings, cq.head);
|
||||
p->cq_off.tail = offsetof(struct io_rings, cq.tail);
|
||||
p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
|
||||
@ -3904,6 +3917,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
|
||||
p->cq_off.cqes = offsetof(struct io_rings, cqes);
|
||||
p->cq_off.flags = offsetof(struct io_rings, cq_flags);
|
||||
p->cq_off.resv1 = 0;
|
||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
|
||||
p->cq_off.user_addr = 0;
|
||||
|
||||
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
|
||||
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
|
||||
@ -3928,22 +3944,30 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = __io_uring_add_tctx_node(ctx);
|
||||
if (ret)
|
||||
goto err_fput;
|
||||
tctx = current->io_uring;
|
||||
|
||||
/*
|
||||
* Install ring fd as the very last thing, so we don't risk someone
|
||||
* having closed it before we finish setup
|
||||
*/
|
||||
ret = io_uring_install_fd(ctx, file);
|
||||
if (ret < 0) {
|
||||
/* fput will clean it up */
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
||||
if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
|
||||
ret = io_ring_add_registered_file(tctx, file, 0, IO_RINGFD_REG_MAX);
|
||||
else
|
||||
ret = io_uring_install_fd(file);
|
||||
if (ret < 0)
|
||||
goto err_fput;
|
||||
|
||||
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
|
||||
return ret;
|
||||
err:
|
||||
io_ring_ctx_wait_and_kill(ctx);
|
||||
return ret;
|
||||
err_fput:
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3969,7 +3993,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
|
||||
IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
|
||||
IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
|
||||
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
|
||||
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN))
|
||||
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
|
||||
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY))
|
||||
return -EINVAL;
|
||||
|
||||
return io_uring_create(entries, &p, params);
|
||||
|
@ -16,9 +16,6 @@
|
||||
#endif
|
||||
|
||||
enum {
|
||||
/* don't use deferred task_work */
|
||||
IOU_F_TWQ_FORCE_NORMAL = 1,
|
||||
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
@ -26,7 +23,7 @@ enum {
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 2,
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -47,7 +44,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx);
|
||||
void io_req_defer_failed(struct io_kiocb *req, s32 res);
|
||||
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags);
|
||||
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
|
||||
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
|
||||
bool io_aux_cqe(const struct io_kiocb *req, bool defer, s32 res, u32 cflags,
|
||||
bool allow_overflow);
|
||||
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
|
||||
|
||||
@ -57,11 +54,6 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd);
|
||||
struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
unsigned issue_flags);
|
||||
|
||||
static inline bool io_req_ffs_set(struct io_kiocb *req)
|
||||
{
|
||||
return req->flags & REQ_F_FIXED_FILE;
|
||||
}
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
bool io_alloc_async_data(struct io_kiocb *req);
|
||||
@ -75,6 +67,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
|
||||
int io_uring_alloc_task_context(struct task_struct *task,
|
||||
struct io_ring_ctx *ctx);
|
||||
|
||||
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
|
||||
int start, int end);
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
|
||||
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
|
||||
@ -115,8 +110,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
|
||||
#define io_for_each_link(pos, head) \
|
||||
for (pos = (head); pos; pos = pos->link)
|
||||
|
||||
void io_cq_unlock_post(struct io_ring_ctx *ctx);
|
||||
|
||||
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
|
||||
bool overflow)
|
||||
{
|
||||
|
@ -162,14 +162,12 @@ static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_fl
|
||||
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct file *file = NULL;
|
||||
unsigned long file_ptr;
|
||||
int idx = msg->src_fd;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
if (likely(idx < ctx->nr_user_files)) {
|
||||
idx = array_index_nospec(idx, ctx->nr_user_files);
|
||||
file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
|
||||
file = (struct file *) (file_ptr & FFS_MASK);
|
||||
file = io_file_from_index(&ctx->file_table, idx);
|
||||
if (file)
|
||||
get_file(file);
|
||||
}
|
||||
|
@ -624,9 +624,15 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
|
||||
* again (for multishot).
|
||||
*/
|
||||
static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
unsigned int cflags, bool mshot_finished,
|
||||
struct msghdr *msg, bool mshot_finished,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
unsigned int cflags;
|
||||
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
if (msg->msg_inq && msg->msg_inq != -1U)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
|
||||
io_req_set_res(req, *ret, cflags);
|
||||
*ret = IOU_OK;
|
||||
@ -634,10 +640,18 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
}
|
||||
|
||||
if (!mshot_finished) {
|
||||
if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
|
||||
if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
*ret, cflags | IORING_CQE_F_MORE, true)) {
|
||||
io_recv_prep_retry(req);
|
||||
return false;
|
||||
/* Known not-empty or unknown state, retry */
|
||||
if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
|
||||
msg->msg_inq == -1U)
|
||||
return false;
|
||||
if (issue_flags & IO_URING_F_MULTISHOT)
|
||||
*ret = IOU_ISSUE_SKIP_COMPLETE;
|
||||
else
|
||||
*ret = -EAGAIN;
|
||||
return true;
|
||||
}
|
||||
/* Otherwise stop multishot but use the current result. */
|
||||
}
|
||||
@ -740,7 +754,6 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct io_async_msghdr iomsg, *kmsg;
|
||||
struct socket *sock;
|
||||
unsigned int cflags;
|
||||
unsigned flags;
|
||||
int ret, min_ret = 0;
|
||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||
@ -791,6 +804,7 @@ retry_multishot:
|
||||
flags |= MSG_DONTWAIT;
|
||||
|
||||
kmsg->msg.msg_get_inq = 1;
|
||||
kmsg->msg.msg_inq = -1U;
|
||||
if (req->flags & REQ_F_APOLL_MULTISHOT) {
|
||||
ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
|
||||
&mshot_finished);
|
||||
@ -831,11 +845,7 @@ retry_multishot:
|
||||
else
|
||||
io_kbuf_recycle(req, issue_flags);
|
||||
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
if (kmsg->msg.msg_inq)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags))
|
||||
if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
|
||||
goto retry_multishot;
|
||||
|
||||
if (mshot_finished) {
|
||||
@ -854,7 +864,6 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct msghdr msg;
|
||||
struct socket *sock;
|
||||
unsigned int cflags;
|
||||
unsigned flags;
|
||||
int ret, min_ret = 0;
|
||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||
@ -871,6 +880,14 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
|
||||
if (unlikely(!sock))
|
||||
return -ENOTSOCK;
|
||||
|
||||
msg.msg_name = NULL;
|
||||
msg.msg_namelen = 0;
|
||||
msg.msg_control = NULL;
|
||||
msg.msg_get_inq = 1;
|
||||
msg.msg_controllen = 0;
|
||||
msg.msg_iocb = NULL;
|
||||
msg.msg_ubuf = NULL;
|
||||
|
||||
retry_multishot:
|
||||
if (io_do_buffer_select(req)) {
|
||||
void __user *buf;
|
||||
@ -885,14 +902,8 @@ retry_multishot:
|
||||
if (unlikely(ret))
|
||||
goto out_free;
|
||||
|
||||
msg.msg_name = NULL;
|
||||
msg.msg_namelen = 0;
|
||||
msg.msg_control = NULL;
|
||||
msg.msg_get_inq = 1;
|
||||
msg.msg_inq = -1U;
|
||||
msg.msg_flags = 0;
|
||||
msg.msg_controllen = 0;
|
||||
msg.msg_iocb = NULL;
|
||||
msg.msg_ubuf = NULL;
|
||||
|
||||
flags = sr->msg_flags;
|
||||
if (force_nonblock)
|
||||
@ -932,11 +943,7 @@ out_free:
|
||||
else
|
||||
io_kbuf_recycle(req, issue_flags);
|
||||
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
if (msg.msg_inq)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags))
|
||||
if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags))
|
||||
goto retry_multishot;
|
||||
|
||||
return ret;
|
||||
@ -1308,7 +1315,6 @@ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
||||
int io_accept(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
|
||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
|
||||
@ -1358,8 +1364,8 @@ retry:
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
|
||||
if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret,
|
||||
IORING_CQE_F_MORE, true))
|
||||
goto retry;
|
||||
|
||||
return -ECANCELED;
|
||||
|
@ -300,8 +300,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
__poll_t mask = mangle_poll(req->cqe.res &
|
||||
req->apoll_events);
|
||||
|
||||
if (!io_aux_cqe(req->ctx, ts->locked, req->cqe.user_data,
|
||||
mask, IORING_CQE_F_MORE, false)) {
|
||||
if (!io_aux_cqe(req, ts->locked, mask,
|
||||
IORING_CQE_F_MORE, false)) {
|
||||
io_req_set_res(req, mask, 0);
|
||||
return IOU_POLL_REMOVE_POLL_USE_RES;
|
||||
}
|
||||
@ -326,7 +326,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
return IOU_POLL_NO_ACTION;
|
||||
}
|
||||
|
||||
static void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -38,3 +38,5 @@ bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
|
||||
bool cancel_all);
|
||||
|
||||
void io_apoll_cache_free(struct io_cache_entry *entry);
|
||||
|
||||
void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
|
@ -354,7 +354,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||
__s32 __user *fds = u64_to_user_ptr(up->data);
|
||||
struct io_rsrc_data *data = ctx->file_data;
|
||||
struct io_fixed_file *file_slot;
|
||||
struct file *file;
|
||||
int fd, i, err = 0;
|
||||
unsigned int done;
|
||||
|
||||
@ -382,15 +381,16 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||
file_slot = io_fixed_file_slot(&ctx->file_table, i);
|
||||
|
||||
if (file_slot->file_ptr) {
|
||||
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||
err = io_queue_rsrc_removal(data, i, file);
|
||||
err = io_queue_rsrc_removal(data, i,
|
||||
io_slot_file(file_slot));
|
||||
if (err)
|
||||
break;
|
||||
file_slot->file_ptr = 0;
|
||||
io_file_bitmap_clear(&ctx->file_table, i);
|
||||
}
|
||||
if (fd != -1) {
|
||||
file = fget(fd);
|
||||
struct file *file = fget(fd);
|
||||
|
||||
if (!file) {
|
||||
err = -EBADF;
|
||||
break;
|
||||
|
@ -283,7 +283,7 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
|
||||
return res;
|
||||
}
|
||||
|
||||
static void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
io_req_io_end(req);
|
||||
|
||||
@ -666,8 +666,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
|
||||
if (unlikely(!file || !(file->f_mode & mode)))
|
||||
return -EBADF;
|
||||
|
||||
if (!io_req_ffs_set(req))
|
||||
req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
|
||||
if (!(req->flags & REQ_F_FIXED_FILE))
|
||||
req->flags |= io_file_get_flags(file);
|
||||
|
||||
kiocb->ki_flags = file->f_iocb_flags;
|
||||
ret = kiocb_set_rw_flags(kiocb, rw->flags);
|
||||
|
@ -22,3 +22,4 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_writev_prep_async(struct io_kiocb *req);
|
||||
void io_readv_writev_cleanup(struct io_kiocb *req);
|
||||
void io_rw_fail(struct io_kiocb *req);
|
||||
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
|
@ -208,29 +208,38 @@ void io_uring_unreg_ringfd(void)
|
||||
}
|
||||
}
|
||||
|
||||
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
|
||||
int start, int end)
|
||||
{
|
||||
int offset;
|
||||
for (offset = start; offset < end; offset++) {
|
||||
offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
|
||||
if (tctx->registered_rings[offset])
|
||||
continue;
|
||||
|
||||
tctx->registered_rings[offset] = file;
|
||||
return offset;
|
||||
}
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
|
||||
int start, int end)
|
||||
{
|
||||
struct file *file;
|
||||
int offset;
|
||||
|
||||
for (offset = start; offset < end; offset++) {
|
||||
offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
|
||||
if (tctx->registered_rings[offset])
|
||||
continue;
|
||||
|
||||
file = fget(fd);
|
||||
if (!file) {
|
||||
return -EBADF;
|
||||
} else if (!io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
tctx->registered_rings[offset] = file;
|
||||
return offset;
|
||||
file = fget(fd);
|
||||
if (!file) {
|
||||
return -EBADF;
|
||||
} else if (!io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return -EBUSY;
|
||||
offset = io_ring_add_registered_file(tctx, file, start, end);
|
||||
if (offset < 0)
|
||||
fput(file);
|
||||
return offset;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -73,8 +73,8 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
|
||||
if (!io_timeout_finish(timeout, data)) {
|
||||
bool filled;
|
||||
filled = io_aux_cqe(ctx, ts->locked, req->cqe.user_data, -ETIME,
|
||||
IORING_CQE_F_MORE, false);
|
||||
filled = io_aux_cqe(req, ts->locked, -ETIME, IORING_CQE_F_MORE,
|
||||
false);
|
||||
if (filled) {
|
||||
/* re-arm timer */
|
||||
spin_lock_irq(&ctx->timeout_lock);
|
||||
@ -594,7 +594,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
|
||||
goto add;
|
||||
}
|
||||
|
||||
tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
|
||||
tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
|
||||
timeout->target_seq = tail + off;
|
||||
|
||||
/* Update the last seq here in case io_flush_timeouts() hasn't.
|
||||
|
@ -20,16 +20,24 @@ static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
ioucmd->task_work_cb(ioucmd, issue_flags);
|
||||
}
|
||||
|
||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags)
|
||||
{
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
|
||||
|
||||
ioucmd->task_work_cb = task_work_cb;
|
||||
req->io_task_work.func = io_uring_cmd_work;
|
||||
io_req_task_work_add(req);
|
||||
__io_req_task_work_add(req, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
|
||||
EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
|
||||
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_do_in_task_lazy);
|
||||
|
||||
static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
|
||||
u64 extra1, u64 extra2)
|
||||
|
@ -471,6 +471,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
|
||||
return file;
|
||||
}
|
||||
|
||||
file->f_mode |= FMODE_NOWAIT;
|
||||
sock->file = file;
|
||||
file->private_data = sock;
|
||||
stream_open(SOCK_INODE(sock), file);
|
||||
|
Loading…
Reference in New Issue
Block a user