mirror of
https://github.com/torvalds/linux.git
synced 2024-09-20 06:53:04 +00:00
io_uring: user registered clockid for wait timeouts
Add a new registration opcode IORING_REGISTER_CLOCK, which allows the user to select which clock id it wants to use with CQ waiting timeouts. It only allows a subset of all posix clocks and currently supports CLOCK_MONOTONIC and CLOCK_BOOTTIME. Suggested-by: Lewis Baker <lewissbaker@gmail.com> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
d29cb3726f
commit
2b8e976b98
|
@ -239,6 +239,9 @@ struct io_ring_ctx {
|
||||||
struct io_rings *rings;
|
struct io_rings *rings;
|
||||||
struct percpu_ref refs;
|
struct percpu_ref refs;
|
||||||
|
|
||||||
|
clockid_t clockid;
|
||||||
|
enum tk_offsets clock_offset;
|
||||||
|
|
||||||
enum task_work_notify_mode notify_method;
|
enum task_work_notify_mode notify_method;
|
||||||
unsigned sq_thread_idle;
|
unsigned sq_thread_idle;
|
||||||
} ____cacheline_aligned_in_smp;
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
|
@ -596,6 +596,8 @@ enum io_uring_register_op {
|
||||||
IORING_REGISTER_NAPI = 27,
|
IORING_REGISTER_NAPI = 27,
|
||||||
IORING_UNREGISTER_NAPI = 28,
|
IORING_UNREGISTER_NAPI = 28,
|
||||||
|
|
||||||
|
IORING_REGISTER_CLOCK = 29,
|
||||||
|
|
||||||
/* this goes last */
|
/* this goes last */
|
||||||
IORING_REGISTER_LAST,
|
IORING_REGISTER_LAST,
|
||||||
|
|
||||||
|
@ -676,6 +678,11 @@ struct io_uring_restriction {
|
||||||
__u32 resv2[3];
|
__u32 resv2[3];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct io_uring_clock_register {
|
||||||
|
__u32 clockid;
|
||||||
|
__u32 __resv[3];
|
||||||
|
};
|
||||||
|
|
||||||
struct io_uring_buf {
|
struct io_uring_buf {
|
||||||
__u64 addr;
|
__u64 addr;
|
||||||
__u32 len;
|
__u32 len;
|
||||||
|
|
|
@ -2377,7 +2377,8 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||||
ret = 0;
|
ret = 0;
|
||||||
if (iowq->timeout == KTIME_MAX)
|
if (iowq->timeout == KTIME_MAX)
|
||||||
schedule();
|
schedule();
|
||||||
else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
|
else if (!schedule_hrtimeout_range_clock(&iowq->timeout, 0,
|
||||||
|
HRTIMER_MODE_ABS, ctx->clockid))
|
||||||
ret = -ETIME;
|
ret = -ETIME;
|
||||||
current->in_iowait = 0;
|
current->in_iowait = 0;
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -2422,7 +2423,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
||||||
|
|
||||||
iowq.timeout = timespec64_to_ktime(ts);
|
iowq.timeout = timespec64_to_ktime(ts);
|
||||||
if (!(flags & IORING_ENTER_ABS_TIMER))
|
if (!(flags & IORING_ENTER_ABS_TIMER))
|
||||||
iowq.timeout = ktime_add(iowq.timeout, ktime_get());
|
iowq.timeout = ktime_add(iowq.timeout, io_get_time(ctx));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sig) {
|
if (sig) {
|
||||||
|
@ -3424,6 +3425,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
ctx->clockid = CLOCK_MONOTONIC;
|
||||||
|
ctx->clock_offset = 0;
|
||||||
|
|
||||||
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
|
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
|
||||||
!(ctx->flags & IORING_SETUP_IOPOLL) &&
|
!(ctx->flags & IORING_SETUP_IOPOLL) &&
|
||||||
!(ctx->flags & IORING_SETUP_SQPOLL))
|
!(ctx->flags & IORING_SETUP_SQPOLL))
|
||||||
|
|
|
@ -437,6 +437,14 @@ static inline bool io_file_can_poll(struct io_kiocb *req)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline ktime_t io_get_time(struct io_ring_ctx *ctx)
|
||||||
|
{
|
||||||
|
if (ctx->clockid == CLOCK_MONOTONIC)
|
||||||
|
return ktime_get();
|
||||||
|
|
||||||
|
return ktime_get_with_offset(ctx->clock_offset);
|
||||||
|
}
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IO_CHECK_CQ_OVERFLOW_BIT,
|
IO_CHECK_CQ_OVERFLOW_BIT,
|
||||||
IO_CHECK_CQ_DROPPED_BIT,
|
IO_CHECK_CQ_DROPPED_BIT,
|
||||||
|
|
|
@ -283,7 +283,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
|
||||||
|
|
||||||
iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
|
iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
|
||||||
if (iowq->timeout != KTIME_MAX) {
|
if (iowq->timeout != KTIME_MAX) {
|
||||||
ktime_t dt = ktime_sub(iowq->timeout, ktime_get());
|
ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
|
||||||
|
|
||||||
iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
|
iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
|
||||||
}
|
}
|
||||||
|
|
|
@ -335,6 +335,31 @@ err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int io_register_clock(struct io_ring_ctx *ctx,
|
||||||
|
struct io_uring_clock_register __user *arg)
|
||||||
|
{
|
||||||
|
struct io_uring_clock_register reg;
|
||||||
|
|
||||||
|
if (copy_from_user(®, arg, sizeof(reg)))
|
||||||
|
return -EFAULT;
|
||||||
|
if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
switch (reg.clockid) {
|
||||||
|
case CLOCK_MONOTONIC:
|
||||||
|
ctx->clock_offset = 0;
|
||||||
|
break;
|
||||||
|
case CLOCK_BOOTTIME:
|
||||||
|
ctx->clock_offset = TK_OFFS_BOOT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->clockid = reg.clockid;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
||||||
void __user *arg, unsigned nr_args)
|
void __user *arg, unsigned nr_args)
|
||||||
__releases(ctx->uring_lock)
|
__releases(ctx->uring_lock)
|
||||||
|
@ -511,6 +536,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
||||||
break;
|
break;
|
||||||
ret = io_unregister_napi(ctx, arg);
|
ret = io_unregister_napi(ctx, arg);
|
||||||
break;
|
break;
|
||||||
|
case IORING_REGISTER_CLOCK:
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (!arg || nr_args)
|
||||||
|
break;
|
||||||
|
ret = io_register_clock(ctx, arg);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user