Pull io_uring thread rewrite from Jens Axboe:
"This converts the io-wq workers to be forked off the tasks in question
instead of being kernel threads that assume various bits of the
original task identity.
This kills > 400 lines of code from io_uring/io-wq, and it's the worst
part of the code. We've had several bugs in this area, and the worry
is always that we could be missing some pieces for file types doing
unusual things (recent /dev/tty example comes to mind, userfaultfd
reads installing file descriptors is another fun one... - both of
which need special handling, and I bet it's not the last weird oddity
we'll find).
With these identical workers, we can have full confidence that we're
never missing anything. That, in itself, is a huge win. Outside of
that, it's also more efficient since we're not wasting space and code
on tracking state, or switching between different states.
I'm sure we're going to find little things to patch up after this
series, but testing has been pretty thorough, from the usual
regression suite to production. Any issue that may crop up should be
manageable.
There's also a nice series of further reductions we can do on top of
this, but I wanted to get the meat of it out sooner rather than later.
The general worry here isn't that it's fundamentally broken. Most of
the little issues we've found over the last week have been related to
just changes in how thread startup/exit is done, since that's the main
difference between using kthreads and these kinds of threads. In fact,
if all goes according to plan, I want to get this into the 5.10 and
5.11 stable branches as well.
That said, the changes outside of io_uring/io-wq are:
- arch setup, simple one-liner to each arch copy_thread()
implementation.
- Removal of net and proc restrictions for io_uring, they are no
longer needed or useful"
* tag 'io_uring-worker.v3-2021-02-25' of git://git.kernel.dk/linux-block: (30 commits)
io-wq: remove now unused IO_WQ_BIT_ERROR
io_uring: fix SQPOLL thread handling over exec
io-wq: improve manager/worker handling over exec
io_uring: ensure SQPOLL startup is triggered before error shutdown
io-wq: make buffered file write hashed work map per-ctx
io-wq: fix race around io_worker grabbing
io-wq: fix races around manager/worker creation and task exit
io_uring: ensure io-wq context is always destroyed for tasks
arch: ensure parisc/powerpc handle PF_IO_WORKER in copy_thread()
io_uring: cleanup ->user usage
io-wq: remove nr_process accounting
io_uring: flag new native workers with IORING_FEAT_NATIVE_WORKERS
net: remove cmsg restriction from io_uring based send/recvmsg calls
Revert "proc: don't allow async path resolution of /proc/self components"
Revert "proc: don't allow async path resolution of /proc/thread-self components"
io_uring: move SQPOLL thread io-wq forked worker
io-wq: make io_wq_fork_thread() available to other users
io-wq: only remove worker from free_list, if it was there
io_uring: remove io_identity
io_uring: remove any grabbing of context
...
149 lines
4.2 KiB
C
149 lines
4.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
|
|
* Chen Liqin <liqin.chen@sunplusct.com>
|
|
* Lennox Wu <lennox.wu@sunplusct.com>
|
|
* Copyright (C) 2012 Regents of the University of California
|
|
* Copyright (C) 2017 SiFive
|
|
*/
|
|
|
|
#include <linux/cpu.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/unistd.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/csr.h>
|
|
#include <asm/stacktrace.h>
|
|
#include <asm/string.h>
|
|
#include <asm/switch_to.h>
|
|
#include <asm/thread_info.h>
|
|
|
|
register unsigned long gp_in_global __asm__("gp");
|
|
|
|
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
|
|
#include <linux/stackprotector.h>
|
|
unsigned long __stack_chk_guard __read_mostly;
|
|
EXPORT_SYMBOL(__stack_chk_guard);
|
|
#endif
|
|
|
|
extern asmlinkage void ret_from_fork(void);
|
|
extern asmlinkage void ret_from_kernel_thread(void);
|
|
|
|
void arch_cpu_idle(void)
|
|
{
|
|
wait_for_interrupt();
|
|
raw_local_irq_enable();
|
|
}
|
|
|
|
void __show_regs(struct pt_regs *regs)
|
|
{
|
|
show_regs_print_info(KERN_DEFAULT);
|
|
|
|
if (!user_mode(regs)) {
|
|
pr_cont("epc : %pS\n", (void *)regs->epc);
|
|
pr_cont(" ra : %pS\n", (void *)regs->ra);
|
|
}
|
|
|
|
pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
|
|
regs->epc, regs->ra, regs->sp);
|
|
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
|
|
regs->gp, regs->tp, regs->t0);
|
|
pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
|
|
regs->t1, regs->t2, regs->s0);
|
|
pr_cont(" s1 : " REG_FMT " a0 : " REG_FMT " a1 : " REG_FMT "\n",
|
|
regs->s1, regs->a0, regs->a1);
|
|
pr_cont(" a2 : " REG_FMT " a3 : " REG_FMT " a4 : " REG_FMT "\n",
|
|
regs->a2, regs->a3, regs->a4);
|
|
pr_cont(" a5 : " REG_FMT " a6 : " REG_FMT " a7 : " REG_FMT "\n",
|
|
regs->a5, regs->a6, regs->a7);
|
|
pr_cont(" s2 : " REG_FMT " s3 : " REG_FMT " s4 : " REG_FMT "\n",
|
|
regs->s2, regs->s3, regs->s4);
|
|
pr_cont(" s5 : " REG_FMT " s6 : " REG_FMT " s7 : " REG_FMT "\n",
|
|
regs->s5, regs->s6, regs->s7);
|
|
pr_cont(" s8 : " REG_FMT " s9 : " REG_FMT " s10: " REG_FMT "\n",
|
|
regs->s8, regs->s9, regs->s10);
|
|
pr_cont(" s11: " REG_FMT " t3 : " REG_FMT " t4 : " REG_FMT "\n",
|
|
regs->s11, regs->t3, regs->t4);
|
|
pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
|
|
regs->t5, regs->t6);
|
|
|
|
pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
|
|
regs->status, regs->badaddr, regs->cause);
|
|
}
|
|
void show_regs(struct pt_regs *regs)
|
|
{
|
|
__show_regs(regs);
|
|
if (!user_mode(regs))
|
|
dump_backtrace(regs, NULL, KERN_DEFAULT);
|
|
}
|
|
|
|
void start_thread(struct pt_regs *regs, unsigned long pc,
|
|
unsigned long sp)
|
|
{
|
|
regs->status = SR_PIE;
|
|
if (has_fpu) {
|
|
regs->status |= SR_FS_INITIAL;
|
|
/*
|
|
* Restore the initial value to the FP register
|
|
* before starting the user program.
|
|
*/
|
|
fstate_restore(current, regs);
|
|
}
|
|
regs->epc = pc;
|
|
regs->sp = sp;
|
|
}
|
|
|
|
void flush_thread(void)
|
|
{
|
|
#ifdef CONFIG_FPU
|
|
/*
|
|
* Reset FPU state and context
|
|
* frm: round to nearest, ties to even (IEEE default)
|
|
* fflags: accrued exceptions cleared
|
|
*/
|
|
fstate_off(current, task_pt_regs(current));
|
|
memset(¤t->thread.fstate, 0, sizeof(current->thread.fstate));
|
|
#endif
|
|
}
|
|
|
|
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|
{
|
|
fstate_save(src, task_pt_regs(src));
|
|
*dst = *src;
|
|
return 0;
|
|
}
|
|
|
|
int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
|
|
struct task_struct *p, unsigned long tls)
|
|
{
|
|
struct pt_regs *childregs = task_pt_regs(p);
|
|
|
|
/* p->thread holds context to be restored by __switch_to() */
|
|
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
|
|
/* Kernel thread */
|
|
memset(childregs, 0, sizeof(struct pt_regs));
|
|
childregs->gp = gp_in_global;
|
|
/* Supervisor/Machine, irqs on: */
|
|
childregs->status = SR_PP | SR_PIE;
|
|
|
|
p->thread.ra = (unsigned long)ret_from_kernel_thread;
|
|
p->thread.s[0] = usp; /* fn */
|
|
p->thread.s[1] = arg;
|
|
} else {
|
|
*childregs = *(current_pt_regs());
|
|
if (usp) /* User fork */
|
|
childregs->sp = usp;
|
|
if (clone_flags & CLONE_SETTLS)
|
|
childregs->tp = tls;
|
|
childregs->a0 = 0; /* Return value of fork() */
|
|
p->thread.ra = (unsigned long)ret_from_fork;
|
|
}
|
|
p->thread.sp = (unsigned long)childregs; /* kernel sp */
|
|
return 0;
|
|
}
|