Updates for x86 FRED:

- Enable FRED right after init_mem_mapping() because at that point the
     early IDT fault handler is replaced by the real fault handler. The real
     fault handler retrieves the faulting address from the stack frame and
     not from CR2 when the FRED feature is set. But that obviously only
     works when FRED is enabled in the CPU as well.
 
   - Set SS to __KERNEL_DS when enabling FRED to prevent a corner case where
     ERETS can observe a SS mismatch and raises a #GP.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmbpNZITHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYobh3EACsU/WhmWG0pjqNs+92i/Hjd5QHRxX8
 WkyB+j0FQ3ZtQ0aqn73G/VxITxCMAE1fwC2iERlN/9eXjGXcwxeaM9upsMs9gq7v
 HmiOPSixn6hH7ulQ6WzDnM478pSnN4lmaZVY2ll1O3z8r79dW2Kz34zSqQCxDGcQ
 3sCJkHr7F0YClUaYxH/dok68F69aZXhU4V9URE30Ec74hnomYd4VuFkHwuA77rHG
 k81lHxSY9/Ttha91CPiK3/lU+lbehYNNZQ+PzUxkNmm9dlzXI8Vl5JRPJGIlYpWQ
 A9L1ZjV4kZcB+tcXPV1bOW+lVSefGVquAia5RgCyUylIFCOtsR/wCoezS3f17Zhf
 Ry+kfkYwuDgD0IYNVp6L3+Fx0LtBJT3BorhnS7YhhiqvLW0EpGe/bBzzRFntp4oR
 TmRAA3nNn3DBCky3rfGg0TWwqfvy/7c6SPY1Zw1SEmqtDdHB/DyKGt+BVQQ2kqWO
 tCtGAMjcE7Cfgca7mI7wILjY7MFirTQW0js6UL5mw22rhZxKV5S9m7N8KkUnFh3S
 acjQ1nL5ZBQ9cKdEGrLNHQjfSSc9ju7aXsGXm5c+vrqKbMG8+Nj+1cvzxaLL5xVY
 LLKACw5rl0LVXHU5H3IwvS+GMipklrmouikdoI4P8vHMd9GBquR4znO3MzqaLtg2
 F1IBXL07s2SYrw==
 =cKRu
 -----END PGP SIGNATURE-----

Merge tag 'x86-fred-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 FRED updates from Thomas Gleixner:

 - Enable FRED right after init_mem_mapping() because at that point the
   early IDT fault handler is replaced by the real fault handler. The
   real fault handler retrieves the faulting address from the stack
   frame and not from CR2 when the FRED feature is set. But that
   obviously only works when FRED is enabled in the CPU as well.

 - Set SS to __KERNEL_DS when enabling FRED to prevent a corner case
   where ERETS can observe a SS mismatch and raises a #GP.

* tag 'x86-fred-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry: Set FRED RSP0 on return to userspace instead of context switch
  x86/msr: Switch between WRMSRNS and WRMSR with the alternatives mechanism
  x86/entry: Test ti_work for zero before processing individual bits
  x86/fred: Set SS to __KERNEL_DS when enabling FRED
  x86/fred: Enable FRED right after init_mem_mapping()
  x86/fred: Move FRED RSP initialization into a separate function
  x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
This commit is contained in:
Linus Torvalds 2024-09-17 14:55:59 +02:00
commit b136021126
11 changed files with 111 additions and 64 deletions

View File

@ -8,6 +8,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/fpu/api.h>
#include <asm/fred.h>
/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
}
#define arch_enter_from_user_mode arch_enter_from_user_mode
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
static inline void arch_exit_work(unsigned long ti_work)
{
if (ti_work & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
}
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
arch_exit_work(ti_work);
fred_update_rsp0();
#ifdef CONFIG_COMPAT
/*

View File

@ -36,6 +36,7 @@
#ifdef CONFIG_X86_FRED
#include <linux/kernel.h>
#include <linux/sched/task_stack.h>
#include <asm/ptrace.h>
@ -84,13 +85,33 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
}
void cpu_init_fred_exceptions(void);
void cpu_init_fred_rsps(void);
void fred_complete_exception_setup(void);
DECLARE_PER_CPU(unsigned long, fred_rsp0);
static __always_inline void fred_sync_rsp0(unsigned long rsp0)
{
__this_cpu_write(fred_rsp0, rsp0);
}
static __always_inline void fred_update_rsp0(void)
{
unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
__this_cpu_write(fred_rsp0, rsp0);
}
}
#else /* CONFIG_X86_FRED */
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
static inline void cpu_init_fred_exceptions(void) { }
static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_sync_rsp0(unsigned long rsp0) { }
static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
#endif /* !__ASSEMBLY__ */

View File

@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
* WRMSRNS behaves exactly like WRMSR with the only difference being
* that it is not a serializing instruction by default.
*/
static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
{
/* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
asm volatile("1: .byte 0x0f,0x01,0xc6\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a"(low), "d" (high));
}
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
@ -312,9 +299,19 @@ do { \
#endif /* !CONFIG_PARAVIRT_XXL */
/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
static __always_inline void wrmsrns(u32 msr, u64 val)
{
__wrmsrns(msr, val, val >> 32);
/*
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
* DS prefix to avoid a trailing NOP.
*/
asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
"2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
}
/*

View File

@ -582,7 +582,8 @@ extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
extern void cpu_init_exception_handling(void);
extern void cpu_init_exception_handling(bool boot_cpu);
extern void cpu_init_replace_early_idt(void);
extern void cr4_init(void);
extern void set_task_blockstep(struct task_struct *task, bool on);

View File

@ -70,13 +70,9 @@ static inline void update_task_stack(struct task_struct *task)
#ifdef CONFIG_X86_32
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* WRMSRNS is a baseline feature for FRED. */
wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
} else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
/* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
}
#endif
}

View File

@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
/* Minimize the gap between FRED is available and available but disabled. */
arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
if (arglen != 2 || strncmp(arg, "on", 2))
setup_clear_cpu_cap(X86_FEATURE_FRED);
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
if (arglen <= 0)
return;
@ -2171,7 +2176,7 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss)
* Setup everything needed to handle exceptions from the IDT, including the IST
* exceptions which use paranoid_entry().
*/
void cpu_init_exception_handling(void)
void cpu_init_exception_handling(bool boot_cpu)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
int cpu = raw_smp_processor_id();
@ -2190,10 +2195,23 @@ void cpu_init_exception_handling(void)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* The boot CPU has enabled FRED during early boot */
if (!boot_cpu)
cpu_init_fred_exceptions();
cpu_init_fred_rsps();
} else {
load_current_idt();
}
}
void __init cpu_init_replace_early_idt(void)
{
if (cpu_feature_enabled(X86_FEATURE_FRED))
cpu_init_fred_exceptions();
else
load_current_idt();
idt_setup_early_pf();
}
/*

View File

@ -83,7 +83,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
{ X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
{}
};

View File

@ -21,17 +21,53 @@
#define FRED_STKLVL(vector, lvl) ((lvl) << (2 * (vector)))
DEFINE_PER_CPU(unsigned long, fred_rsp0);
EXPORT_PER_CPU_SYMBOL(fred_rsp0);
void cpu_init_fred_exceptions(void)
{
/* When FRED is enabled by default, remove this log message */
pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
/*
* If a kernel event is delivered before a CPU goes to user level for
* the first time, its SS is NULL thus NULL is pushed into the SS field
* of the FRED stack frame. But before ERETS is executed, the CPU may
* context switch to another task and go to user level. Then when the
* CPU comes back to kernel mode, SS is changed to __KERNEL_DS. Later
* when ERETS is executed to return from the kernel event handler, a #GP
* fault is generated because SS doesn't match the SS saved in the FRED
* stack frame.
*
* Initialize SS to __KERNEL_DS when enabling FRED to avoid such #GPs.
*/
loadsegment(ss, __KERNEL_DS);
wrmsrl(MSR_IA32_FRED_CONFIG,
/* Reserve for CALL emulation */
FRED_CONFIG_REDZONE |
FRED_CONFIG_INT_STKLVL(0) |
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
wrmsrl(MSR_IA32_FRED_RSP0, 0);
wrmsrl(MSR_IA32_FRED_RSP1, 0);
wrmsrl(MSR_IA32_FRED_RSP2, 0);
wrmsrl(MSR_IA32_FRED_RSP3, 0);
/* Enable FRED */
cr4_set_bits(X86_CR4_FRED);
/* Any further IDT use is a bug */
idt_invalidate();
/* Use int $0x80 for 32-bit system calls in FRED mode */
setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
/* Must be called after setup_cpu_entry_areas() */
void cpu_init_fred_rsps(void)
{
/*
* The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
* (remember that user space faults are always taken on stack level 0)
@ -47,13 +83,4 @@ void cpu_init_fred_exceptions(void)
wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
/* Enable FRED */
cr4_set_bits(X86_CR4_FRED);
/* Any further IDT use is a bug */
idt_invalidate();
/* Use int $0x80 for 32-bit system calls in FRED mode */
setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}

View File

@ -1039,7 +1039,12 @@ void __init setup_arch(char **cmdline_p)
init_mem_mapping();
idt_setup_early_pf();
/*
* init_mem_mapping() relies on the early IDT page fault handling.
* Now either enable FRED or install the real page fault handler
* for 64-bit in the IDT.
*/
cpu_init_replace_early_idt();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)

View File

@ -246,7 +246,7 @@ static void notrace start_secondary(void *unused)
__flush_tlb_all();
}
cpu_init_exception_handling();
cpu_init_exception_handling(false);
/*
* Load the microcode before reaching the AP alive synchronization

View File

@ -1451,34 +1451,8 @@ DEFINE_IDTENTRY_SW(iret_error)
}
#endif
/* Do not enable FRED by default yet. */
static bool enable_fred __ro_after_init = false;
#ifdef CONFIG_X86_FRED
static int __init fred_setup(char *str)
{
if (!str)
return -EINVAL;
if (!cpu_feature_enabled(X86_FEATURE_FRED))
return 0;
if (!strcmp(str, "on"))
enable_fred = true;
else if (!strcmp(str, "off"))
enable_fred = false;
else
pr_warn("invalid FRED option: 'fred=%s'\n", str);
return 0;
}
early_param("fred", fred_setup);
#endif
void __init trap_init(void)
{
if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
setup_clear_cpu_cap(X86_FEATURE_FRED);
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
@ -1486,7 +1460,7 @@ void __init trap_init(void)
sev_es_init_vc_handling();
/* Initialize TSS before setting up traps so ISTs work */
cpu_init_exception_handling();
cpu_init_exception_handling(true);
/* Setup traps as cpu_init() might #GP */
if (!cpu_feature_enabled(X86_FEATURE_FRED))