diff --git a/fs/io_uring.c b/fs/io_uring.c index cfad2acd4d86..7426e4f23f9b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4417,6 +4417,29 @@ struct io_poll_table { int error; }; +static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb) +{ + struct task_struct *tsk = req->task; + struct io_ring_ctx *ctx = req->ctx; + int ret, notify = TWA_RESUME; + + /* + * SQPOLL kernel thread doesn't need notification, just a wakeup. + * If we're not using an eventfd, then TWA_RESUME is always fine, + * as we won't have dependencies between request completions for + * other kernel wait conditions. + */ + if (ctx->flags & IORING_SETUP_SQPOLL) + notify = 0; + else if (ctx->cq_ev_fd) + notify = TWA_SIGNAL; + + ret = task_work_add(tsk, cb, notify); + if (!ret) + wake_up_process(tsk); + return ret; +} + static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, __poll_t mask, task_work_func_t func) { @@ -4440,13 +4463,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, * of executing it. We can't safely execute it anyway, as we may not * have the needed state needed for it anyway. */ - ret = task_work_add(tsk, &req->task_work, true); + ret = io_req_task_work_add(req, &req->task_work); if (unlikely(ret)) { WRITE_ONCE(poll->canceled, true); tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, true); + task_work_add(tsk, &req->task_work, 0); + wake_up_process(tsk); } - wake_up_process(tsk); return 1; } @@ -6486,15 +6509,23 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, do { prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); + /* make sure we run task_work before checking for signals */ if (current->task_works) task_work_run(); - if (io_should_wake(&iowq, false)) - break; - schedule(); if (signal_pending(current)) { + if (current->jobctl & JOBCTL_TASK_WORK) { + spin_lock_irq(¤t->sighand->siglock); + current->jobctl &= ~JOBCTL_TASK_WORK; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + continue; + } ret = -EINTR; break; } + if (io_should_wake(&iowq, false)) + break; + schedule(); } while (1); finish_wait(&ctx->wait, &iowq.wq); diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index fa067de9f1a9..d2b4204ba4d3 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,6 +19,7 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ +#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -28,9 +29,10 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) +#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/include/linux/task_work.h b/include/linux/task_work.h index bd9a6a91c097..0fb93aafa478 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func) twork->func = func; } -int task_work_add(struct task_struct *task, struct callback_head *twork, bool); +#define TWA_RESUME 1 +#define TWA_SIGNAL 2 +int task_work_add(struct task_struct *task, struct callback_head *twork, int); + struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); diff --git a/kernel/signal.c b/kernel/signal.c index 5ca48cc5da76..ee22ec78fd6d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; - if (unlikely(current->task_works)) - task_work_run(); - if (unlikely(uprobe_deny_signal())) return false; @@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig) relock: spin_lock_irq(&sighand->siglock); + current->jobctl &= ~JOBCTL_TASK_WORK; + if (unlikely(current->task_works)) { + spin_unlock_irq(&sighand->siglock); + task_work_run(); + goto relock; + } + /* * Every stopped thread goes here after wakeup. Check to see if * we should notify the parent, prepare_signal(SIGCONT) encodes diff --git a/kernel/task_work.c b/kernel/task_work.c index 825f28259a19..5c0848ca1287 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ * 0 if succeeds or -ESRCH. */ int -task_work_add(struct task_struct *task, struct callback_head *work, bool notify) +task_work_add(struct task_struct *task, struct callback_head *work, int notify) { struct callback_head *head; + unsigned long flags; do { head = READ_ONCE(task->task_works); @@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) work->next = head; } while (cmpxchg(&task->task_works, head, work) != head); - if (notify) + switch (notify) { + case TWA_RESUME: set_notify_resume(task); + break; + case TWA_SIGNAL: + if (lock_task_sighand(task, &flags)) { + task->jobctl |= JOBCTL_TASK_WORK; + signal_wake_up(task, 0); + unlock_task_sighand(task, &flags); + } + break; + } + return 0; }