diff --git a/block/blk.h b/block/blk.h index 86affb583eb6..c718e4291db0 100644 --- a/block/blk.h +++ b/block/blk.h @@ -609,6 +609,7 @@ blk_mode_t file_to_blk_mode(struct file *file); int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, loff_t lstart, loff_t lend); long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); extern const struct address_space_operations def_blk_aops; diff --git a/block/fops.c b/block/fops.c index 9825c1713a49..8154b10b5abf 100644 --- a/block/fops.c +++ b/block/fops.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "blk.h" static inline struct inode *bdev_file_inode(struct file *file) @@ -873,6 +874,7 @@ const struct file_operations def_blk_fops = { .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .uring_cmd = blkdev_uring_cmd, .fop_flags = FOP_BUFFER_RASYNC, }; diff --git a/block/ioctl.c b/block/ioctl.c index 6eaa3f20bf34..6554b728bae6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include "blk.h" static int blkpg_do_ioctl(struct block_device *bdev, @@ -748,3 +751,112 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return ret; } #endif + +struct blk_iou_cmd { + int res; + bool nowait; +}; + +static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + + if (bic->res == -EAGAIN && bic->nowait) + io_uring_cmd_issue_blocking(cmd); + else + io_uring_cmd_done(cmd, bic->res, 0, issue_flags); +} + +static void bio_cmd_bio_end_io(struct bio *bio) +{ + struct io_uring_cmd *cmd = bio->bi_private; + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + + if (unlikely(bio->bi_status) && !bic->res) + bic->res = blk_status_to_errno(bio->bi_status); + + io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); + bio_put(bio); +} + +static int blkdev_cmd_discard(struct io_uring_cmd *cmd, + struct block_device *bdev, + uint64_t start, uint64_t len, bool nowait) +{ + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; + sector_t sector = start >> SECTOR_SHIFT; + sector_t nr_sects = len >> SECTOR_SHIFT; + struct bio *prev = NULL, *bio; + int err; + + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) + return -EBADF; + if (bdev_read_only(bdev)) + return -EPERM; + err = blk_validate_byte_range(bdev, start, len); + if (err) + return err; + + err = filemap_invalidate_pages(bdev->bd_mapping, start, + start + len - 1, nowait); + if (err) + return err; + + while (true) { + bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); + if (!bio) + break; + if (nowait) { + /* + * Don't allow multi-bio non-blocking submissions as + * subsequent bios may fail but we won't get a direct + * indication of that. Normally, the caller should + * retry from a blocking context. + */ + if (unlikely(nr_sects)) { + bio_put(bio); + return -EAGAIN; + } + bio->bi_opf |= REQ_NOWAIT; + } + + prev = bio_chain_and_submit(prev, bio); + } + if (unlikely(!prev)) + return -EAGAIN; + if (unlikely(nr_sects)) + bic->res = -EAGAIN; + + prev->bi_private = cmd; + prev->bi_end_io = bio_cmd_bio_end_io; + submit_bio(prev); + return -EIOCBQUEUED; +} + +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + const struct io_uring_sqe *sqe = cmd->sqe; + u32 cmd_op = cmd->cmd_op; + uint64_t start, len; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->res = 0; + bic->nowait = issue_flags & IO_URING_F_NONBLOCK; + + start = READ_ONCE(sqe->addr); + len = READ_ONCE(sqe->addr3); + + switch (cmd_op) { + case BLOCK_URING_CMD_DISCARD: + return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + } + return -EINVAL; +} diff --git a/include/uapi/linux/blkdev.h b/include/uapi/linux/blkdev.h new file mode 100644 index 000000000000..66373cd1a83a --- /dev/null +++ b/include/uapi/linux/blkdev.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_BLKDEV_H +#define _UAPI_LINUX_BLKDEV_H + +#include +#include + +/* + * io_uring block file commands, see IORING_OP_URING_CMD. + * It's a different number space from ioctl(), reuse the block's code 0x12. + */ +#define BLOCK_URING_CMD_DISCARD _IO(0x12, 0) + +#endif