forked from Minki/linux
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Various fixes: - Fix the PAT performance regression that downgraded write-combining device memory regions to uncached. - There's been a number of bugs in 32-bit double fault handling - hopefully all fixed now. - Fix an LDT crash - Fix an FPU over-optimization that broke with GCC9 code optimizations. - Misc cleanups" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm/pat: Fix off-by-one bugs in interval tree search x86/ioperm: Save an indentation level in tss_update_io_bitmap() x86/fpu: Don't cache access to fpu_fpregs_owner_ctx x86/entry/32: Remove unused 'restore_all_notrace' local label x86/ptrace: Document FSBASE and GSBASE ABI oddities x86/ptrace: Remove set_segment_reg() implementations for current x86/traps: die() instead of panicking on a double fault x86/doublefault/32: Rewrite the x86_32 #DF handler and unify with 64-bit x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area x86/doublefault/32: Rename doublefault.c to doublefault_32.c x86/traps: Disentangle the 32-bit and 64-bit doublefault code lkdtm: Add a DOUBLE_FAULT crash type on x86 selftests/x86/single_step_syscall: Check SYSENTER directly x86/mm/32: Sync only to VMALLOC_END in vmalloc_sync_all()
This commit is contained in:
commit
e5b3fc125d
@ -117,7 +117,7 @@ config DEBUG_WX
|
|||||||
|
|
||||||
config DOUBLEFAULT
|
config DOUBLEFAULT
|
||||||
default y
|
default y
|
||||||
bool "Enable doublefault exception handler" if EXPERT
|
bool "Enable doublefault exception handler" if EXPERT && X86_32
|
||||||
---help---
|
---help---
|
||||||
This option allows trapping of rare doublefault exceptions that
|
This option allows trapping of rare doublefault exceptions that
|
||||||
would otherwise cause a system to silently reboot. Disabling this
|
would otherwise cause a system to silently reboot. Disabling this
|
||||||
|
@ -1090,7 +1090,6 @@ SYM_FUNC_START(entry_INT80_32)
|
|||||||
restore_all:
|
restore_all:
|
||||||
TRACE_IRQS_IRET
|
TRACE_IRQS_IRET
|
||||||
SWITCH_TO_ENTRY_STACK
|
SWITCH_TO_ENTRY_STACK
|
||||||
.Lrestore_all_notrace:
|
|
||||||
CHECK_AND_APPLY_ESPFIX
|
CHECK_AND_APPLY_ESPFIX
|
||||||
.Lrestore_nocheck:
|
.Lrestore_nocheck:
|
||||||
/* Switch back to user CR3 */
|
/* Switch back to user CR3 */
|
||||||
@ -1537,6 +1536,48 @@ SYM_CODE_START(debug)
|
|||||||
jmp common_exception
|
jmp common_exception
|
||||||
SYM_CODE_END(debug)
|
SYM_CODE_END(debug)
|
||||||
|
|
||||||
|
#ifdef CONFIG_DOUBLEFAULT
|
||||||
|
SYM_CODE_START(double_fault)
|
||||||
|
1:
|
||||||
|
/*
|
||||||
|
* This is a task gate handler, not an interrupt gate handler.
|
||||||
|
* The error code is on the stack, but the stack is otherwise
|
||||||
|
* empty. Interrupts are off. Our state is sane with the following
|
||||||
|
* exceptions:
|
||||||
|
*
|
||||||
|
* - CR0.TS is set. "TS" literally means "task switched".
|
||||||
|
* - EFLAGS.NT is set because we're a "nested task".
|
||||||
|
* - The doublefault TSS has back_link set and has been marked busy.
|
||||||
|
* - TR points to the doublefault TSS and the normal TSS is busy.
|
||||||
|
* - CR3 is the normal kernel PGD. This would be delightful, except
|
||||||
|
* that the CPU didn't bother to save the old CR3 anywhere. This
|
||||||
|
* would make it very awkward to return back to the context we came
|
||||||
|
* from.
|
||||||
|
*
|
||||||
|
* The rest of EFLAGS is sanitized for us, so we don't need to
|
||||||
|
* worry about AC or DF.
|
||||||
|
*
|
||||||
|
* Don't even bother popping the error code. It's always zero,
|
||||||
|
* and ignoring it makes us a bit more robust against buggy
|
||||||
|
* hypervisor task gate implementations.
|
||||||
|
*
|
||||||
|
* We will manually undo the task switch instead of doing a
|
||||||
|
* task-switching IRET.
|
||||||
|
*/
|
||||||
|
|
||||||
|
clts /* clear CR0.TS */
|
||||||
|
pushl $X86_EFLAGS_FIXED
|
||||||
|
popfl /* clear EFLAGS.NT */
|
||||||
|
|
||||||
|
call doublefault_shim
|
||||||
|
|
||||||
|
/* We don't support returning, so we have no IRET here. */
|
||||||
|
1:
|
||||||
|
hlt
|
||||||
|
jmp 1b
|
||||||
|
SYM_CODE_END(double_fault)
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NMI is doubly nasty. It can happen on the first instruction of
|
* NMI is doubly nasty. It can happen on the first instruction of
|
||||||
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
|
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
|
||||||
|
@ -65,6 +65,13 @@ enum exception_stack_ordering {
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
struct doublefault_stack {
|
||||||
|
unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
|
||||||
|
struct x86_hw_tss tss;
|
||||||
|
} __aligned(PAGE_SIZE);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||||
* and early entry/exit code. Real types aren't used for all fields here
|
* and early entry/exit code. Real types aren't used for all fields here
|
||||||
@ -86,6 +93,11 @@ struct cpu_entry_area {
|
|||||||
#endif
|
#endif
|
||||||
struct entry_stack_page entry_stack_page;
|
struct entry_stack_page entry_stack_page;
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
char guard_doublefault_stack[PAGE_SIZE];
|
||||||
|
struct doublefault_stack doublefault_stack;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
||||||
* we need task switches to work, and task switches write to the TSS.
|
* we need task switches to work, and task switches write to the TSS.
|
||||||
|
13
arch/x86/include/asm/doublefault.h
Normal file
13
arch/x86/include/asm/doublefault.h
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _ASM_X86_DOUBLEFAULT_H
|
||||||
|
#define _ASM_X86_DOUBLEFAULT_H
|
||||||
|
|
||||||
|
#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
|
||||||
|
extern void doublefault_init_cpu_tss(void);
|
||||||
|
#else
|
||||||
|
static inline void doublefault_init_cpu_tss(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _ASM_X86_DOUBLEFAULT_H */
|
@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
|
|||||||
|
|
||||||
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
|
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
|
||||||
{
|
{
|
||||||
return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
|
return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -41,10 +41,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
|
* This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
|
||||||
* to avoid include recursion hell
|
* Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
|
||||||
|
* to avoid include recursion hell.
|
||||||
*/
|
*/
|
||||||
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
|
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
|
||||||
|
|
||||||
/* The +1 is for the readonly IDT page: */
|
/* The +1 is for the readonly IDT page: */
|
||||||
#define CPU_ENTRY_AREA_BASE \
|
#define CPU_ENTRY_AREA_BASE \
|
||||||
|
@ -166,7 +166,6 @@ enum cpuid_regs_idx {
|
|||||||
extern struct cpuinfo_x86 boot_cpu_data;
|
extern struct cpuinfo_x86 boot_cpu_data;
|
||||||
extern struct cpuinfo_x86 new_cpu_data;
|
extern struct cpuinfo_x86 new_cpu_data;
|
||||||
|
|
||||||
extern struct x86_hw_tss doublefault_tss;
|
|
||||||
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
||||||
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
||||||
|
|
||||||
@ -997,7 +996,6 @@ bool xen_set_default_idle(void);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
void stop_this_cpu(void *dummy);
|
void stop_this_cpu(void *dummy);
|
||||||
void df_debug(struct pt_regs *regs, long error_code);
|
|
||||||
void microcode_check(void);
|
void microcode_check(void);
|
||||||
|
|
||||||
enum l1tf_mitigations {
|
enum l1tf_mitigations {
|
||||||
|
@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
|
|||||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
|
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
|
||||||
dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
|
dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
|
||||||
dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
|
dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
|
||||||
|
#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
|
||||||
|
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
|
||||||
|
#endif
|
||||||
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
|
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
|
||||||
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
|
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
|
||||||
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
|
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
|
||||||
|
@ -100,7 +100,9 @@ obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
|
|||||||
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
|
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
|
||||||
obj-y += kprobes/
|
obj-y += kprobes/
|
||||||
obj-$(CONFIG_MODULES) += module.o
|
obj-$(CONFIG_MODULES) += module.o
|
||||||
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
|
ifeq ($(CONFIG_X86_32),y)
|
||||||
|
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
|
||||||
|
endif
|
||||||
obj-$(CONFIG_KGDB) += kgdb.o
|
obj-$(CONFIG_KGDB) += kgdb.o
|
||||||
obj-$(CONFIG_VM86) += vm86_32.o
|
obj-$(CONFIG_VM86) += vm86_32.o
|
||||||
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <asm/stackprotector.h>
|
#include <asm/stackprotector.h>
|
||||||
#include <asm/perf_event.h>
|
#include <asm/perf_event.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
#include <asm/doublefault.h>
|
||||||
#include <asm/archrandom.h>
|
#include <asm/archrandom.h>
|
||||||
#include <asm/hypervisor.h>
|
#include <asm/hypervisor.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
@ -1814,8 +1815,6 @@ static inline void tss_setup_ist(struct tss_struct *tss)
|
|||||||
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
|
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void gdt_setup_doublefault_tss(int cpu) { }
|
|
||||||
|
|
||||||
#else /* CONFIG_X86_64 */
|
#else /* CONFIG_X86_64 */
|
||||||
|
|
||||||
static inline void setup_getcpu(int cpu) { }
|
static inline void setup_getcpu(int cpu) { }
|
||||||
@ -1827,13 +1826,6 @@ static inline void ucode_cpu_init(int cpu)
|
|||||||
|
|
||||||
static inline void tss_setup_ist(struct tss_struct *tss) { }
|
static inline void tss_setup_ist(struct tss_struct *tss) { }
|
||||||
|
|
||||||
static inline void gdt_setup_doublefault_tss(int cpu)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_DOUBLEFAULT
|
|
||||||
/* Set up the doublefault TSS pointer in the GDT */
|
|
||||||
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif /* !CONFIG_X86_64 */
|
#endif /* !CONFIG_X86_64 */
|
||||||
|
|
||||||
static inline void tss_setup_io_bitmap(struct tss_struct *tss)
|
static inline void tss_setup_io_bitmap(struct tss_struct *tss)
|
||||||
@ -1923,7 +1915,7 @@ void cpu_init(void)
|
|||||||
clear_all_debug_regs();
|
clear_all_debug_regs();
|
||||||
dbg_restore_debug_regs();
|
dbg_restore_debug_regs();
|
||||||
|
|
||||||
gdt_setup_doublefault_tss(cpu);
|
doublefault_init_cpu_tss();
|
||||||
|
|
||||||
fpu__init_cpu();
|
fpu__init_cpu();
|
||||||
|
|
||||||
|
@ -1,86 +0,0 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
|
||||||
#include <linux/mm.h>
|
|
||||||
#include <linux/sched.h>
|
|
||||||
#include <linux/sched/debug.h>
|
|
||||||
#include <linux/init_task.h>
|
|
||||||
#include <linux/fs.h>
|
|
||||||
|
|
||||||
#include <linux/uaccess.h>
|
|
||||||
#include <asm/pgtable.h>
|
|
||||||
#include <asm/processor.h>
|
|
||||||
#include <asm/desc.h>
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
|
|
||||||
#define DOUBLEFAULT_STACKSIZE (1024)
|
|
||||||
static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
|
|
||||||
#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
|
|
||||||
|
|
||||||
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
|
|
||||||
|
|
||||||
static void doublefault_fn(void)
|
|
||||||
{
|
|
||||||
struct desc_ptr gdt_desc = {0, 0};
|
|
||||||
unsigned long gdt, tss;
|
|
||||||
|
|
||||||
native_store_gdt(&gdt_desc);
|
|
||||||
gdt = gdt_desc.address;
|
|
||||||
|
|
||||||
printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
|
|
||||||
|
|
||||||
if (ptr_ok(gdt)) {
|
|
||||||
gdt += GDT_ENTRY_TSS << 3;
|
|
||||||
tss = get_desc_base((struct desc_struct *)gdt);
|
|
||||||
printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
|
|
||||||
|
|
||||||
if (ptr_ok(tss)) {
|
|
||||||
struct x86_hw_tss *t = (struct x86_hw_tss *)tss;
|
|
||||||
|
|
||||||
printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
|
|
||||||
t->ip, t->sp);
|
|
||||||
|
|
||||||
printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
|
|
||||||
t->ax, t->bx, t->cx, t->dx);
|
|
||||||
printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
|
|
||||||
t->si, t->di);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
cpu_relax();
|
|
||||||
}
|
|
||||||
|
|
||||||
struct x86_hw_tss doublefault_tss __cacheline_aligned = {
|
|
||||||
.sp0 = STACK_START,
|
|
||||||
.ss0 = __KERNEL_DS,
|
|
||||||
.ldt = 0,
|
|
||||||
.io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
|
|
||||||
|
|
||||||
.ip = (unsigned long) doublefault_fn,
|
|
||||||
/* 0x2 bit is always set */
|
|
||||||
.flags = X86_EFLAGS_SF | 0x2,
|
|
||||||
.sp = STACK_START,
|
|
||||||
.es = __USER_DS,
|
|
||||||
.cs = __KERNEL_CS,
|
|
||||||
.ss = __KERNEL_DS,
|
|
||||||
.ds = __USER_DS,
|
|
||||||
.fs = __KERNEL_PERCPU,
|
|
||||||
#ifndef CONFIG_X86_32_LAZY_GS
|
|
||||||
.gs = __KERNEL_STACK_CANARY,
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.__cr3 = __pa_nodebug(swapper_pg_dir),
|
|
||||||
};
|
|
||||||
|
|
||||||
/* dummy for do_double_fault() call */
|
|
||||||
void df_debug(struct pt_regs *regs, long error_code) {}
|
|
||||||
|
|
||||||
#else /* !CONFIG_X86_32 */
|
|
||||||
|
|
||||||
void df_debug(struct pt_regs *regs, long error_code)
|
|
||||||
{
|
|
||||||
pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
|
|
||||||
show_regs(regs);
|
|
||||||
panic("Machine halted.");
|
|
||||||
}
|
|
||||||
#endif
|
|
136
arch/x86/kernel/doublefault_32.c
Normal file
136
arch/x86/kernel/doublefault_32.c
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <linux/sched/debug.h>
|
||||||
|
#include <linux/init_task.h>
|
||||||
|
#include <linux/fs.h>
|
||||||
|
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <asm/pgtable.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
#include <asm/desc.h>
|
||||||
|
#include <asm/traps.h>
|
||||||
|
|
||||||
|
extern void double_fault(void);
|
||||||
|
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
|
||||||
|
|
||||||
|
#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
|
||||||
|
|
||||||
|
static void set_df_gdt_entry(unsigned int cpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks
|
||||||
|
* we're running the doublefault task. Cannot return.
|
||||||
|
*/
|
||||||
|
asmlinkage notrace void __noreturn doublefault_shim(void)
|
||||||
|
{
|
||||||
|
unsigned long cr2;
|
||||||
|
struct pt_regs regs;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
|
||||||
|
|
||||||
|
cr2 = native_read_cr2();
|
||||||
|
|
||||||
|
/* Reset back to the normal kernel task. */
|
||||||
|
force_reload_TR();
|
||||||
|
set_df_gdt_entry(smp_processor_id());
|
||||||
|
|
||||||
|
trace_hardirqs_off();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill in pt_regs. A downside of doing this in C is that the unwinder
|
||||||
|
* won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
|
||||||
|
* won't successfully unwind to the source of the double fault.
|
||||||
|
* The main dump from do_double_fault() is fine, though, since it
|
||||||
|
* uses these regs directly.
|
||||||
|
*
|
||||||
|
* If anyone ever cares, this could be moved to asm.
|
||||||
|
*/
|
||||||
|
regs.ss = TSS(ss);
|
||||||
|
regs.__ssh = 0;
|
||||||
|
regs.sp = TSS(sp);
|
||||||
|
regs.flags = TSS(flags);
|
||||||
|
regs.cs = TSS(cs);
|
||||||
|
/* We won't go through the entry asm, so we can leave __csh as 0. */
|
||||||
|
regs.__csh = 0;
|
||||||
|
regs.ip = TSS(ip);
|
||||||
|
regs.orig_ax = 0;
|
||||||
|
regs.gs = TSS(gs);
|
||||||
|
regs.__gsh = 0;
|
||||||
|
regs.fs = TSS(fs);
|
||||||
|
regs.__fsh = 0;
|
||||||
|
regs.es = TSS(es);
|
||||||
|
regs.__esh = 0;
|
||||||
|
regs.ds = TSS(ds);
|
||||||
|
regs.__dsh = 0;
|
||||||
|
regs.ax = TSS(ax);
|
||||||
|
regs.bp = TSS(bp);
|
||||||
|
regs.di = TSS(di);
|
||||||
|
regs.si = TSS(si);
|
||||||
|
regs.dx = TSS(dx);
|
||||||
|
regs.cx = TSS(cx);
|
||||||
|
regs.bx = TSS(bx);
|
||||||
|
|
||||||
|
do_double_fault(®s, 0, cr2);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* x86_32 does not save the original CR3 anywhere on a task switch.
|
||||||
|
* This means that, even if we wanted to return, we would need to find
|
||||||
|
* some way to reconstruct CR3. We could make a credible guess based
|
||||||
|
* on cpu_tlbstate, but that would be racy and would not account for
|
||||||
|
* PTI.
|
||||||
|
*
|
||||||
|
* Instead, don't bother. We can return through
|
||||||
|
* rewind_stack_do_exit() instead.
|
||||||
|
*/
|
||||||
|
panic("cannot return from double fault\n");
|
||||||
|
}
|
||||||
|
NOKPROBE_SYMBOL(doublefault_shim);
|
||||||
|
|
||||||
|
DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
|
||||||
|
.tss = {
|
||||||
|
/*
|
||||||
|
* No sp0 or ss0 -- we never run CPL != 0 with this TSS
|
||||||
|
* active. sp is filled in later.
|
||||||
|
*/
|
||||||
|
.ldt = 0,
|
||||||
|
.io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
|
||||||
|
|
||||||
|
.ip = (unsigned long) double_fault,
|
||||||
|
.flags = X86_EFLAGS_FIXED,
|
||||||
|
.es = __USER_DS,
|
||||||
|
.cs = __KERNEL_CS,
|
||||||
|
.ss = __KERNEL_DS,
|
||||||
|
.ds = __USER_DS,
|
||||||
|
.fs = __KERNEL_PERCPU,
|
||||||
|
#ifndef CONFIG_X86_32_LAZY_GS
|
||||||
|
.gs = __KERNEL_STACK_CANARY,
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.__cr3 = __pa_nodebug(swapper_pg_dir),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static void set_df_gdt_entry(unsigned int cpu)
|
||||||
|
{
|
||||||
|
/* Set up doublefault TSS pointer in the GDT */
|
||||||
|
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
|
||||||
|
&get_cpu_entry_area(cpu)->doublefault_stack.tss);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void doublefault_init_cpu_tss(void)
|
||||||
|
{
|
||||||
|
unsigned int cpu = smp_processor_id();
|
||||||
|
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The linker isn't smart enough to initialize percpu variables that
|
||||||
|
* point to other places in percpu space.
|
||||||
|
*/
|
||||||
|
this_cpu_write(doublefault_stack.tss.sp,
|
||||||
|
(unsigned long)&cea->doublefault_stack.stack +
|
||||||
|
sizeof(doublefault_stack.stack));
|
||||||
|
|
||||||
|
set_df_gdt_entry(cpu);
|
||||||
|
}
|
@ -29,6 +29,9 @@ const char *stack_type_name(enum stack_type type)
|
|||||||
if (type == STACK_TYPE_ENTRY)
|
if (type == STACK_TYPE_ENTRY)
|
||||||
return "ENTRY_TRAMPOLINE";
|
return "ENTRY_TRAMPOLINE";
|
||||||
|
|
||||||
|
if (type == STACK_TYPE_EXCEPTION)
|
||||||
|
return "#DF";
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,6 +85,30 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DOUBLEFAULT
|
||||||
|
struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id());
|
||||||
|
struct doublefault_stack *ss = &cea->doublefault_stack;
|
||||||
|
|
||||||
|
void *begin = ss->stack;
|
||||||
|
void *end = begin + sizeof(ss->stack);
|
||||||
|
|
||||||
|
if ((void *)stack < begin || (void *)stack >= end)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
info->type = STACK_TYPE_EXCEPTION;
|
||||||
|
info->begin = begin;
|
||||||
|
info->end = end;
|
||||||
|
info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||||
struct stack_info *info, unsigned long *visit_mask)
|
struct stack_info *info, unsigned long *visit_mask)
|
||||||
{
|
{
|
||||||
@ -105,6 +132,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
|||||||
if (in_softirq_stack(stack, info))
|
if (in_softirq_stack(stack, info))
|
||||||
goto recursion_check;
|
goto recursion_check;
|
||||||
|
|
||||||
|
if (in_doublefault_stack(stack, info))
|
||||||
|
goto recursion_check;
|
||||||
|
|
||||||
goto unknown;
|
goto unknown;
|
||||||
|
|
||||||
recursion_check:
|
recursion_check:
|
||||||
|
@ -377,37 +377,37 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
|
|||||||
void tss_update_io_bitmap(void)
|
void tss_update_io_bitmap(void)
|
||||||
{
|
{
|
||||||
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
|
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
|
||||||
|
struct thread_struct *t = ¤t->thread;
|
||||||
u16 *base = &tss->x86_tss.io_bitmap_base;
|
u16 *base = &tss->x86_tss.io_bitmap_base;
|
||||||
|
|
||||||
if (test_thread_flag(TIF_IO_BITMAP)) {
|
if (!test_thread_flag(TIF_IO_BITMAP)) {
|
||||||
struct thread_struct *t = ¤t->thread;
|
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) {
|
|
||||||
*base = IO_BITMAP_OFFSET_VALID_ALL;
|
|
||||||
} else {
|
|
||||||
struct io_bitmap *iobm = t->io_bitmap;
|
|
||||||
/*
|
|
||||||
* Only copy bitmap data when the sequence number
|
|
||||||
* differs. The update time is accounted to the
|
|
||||||
* incoming task.
|
|
||||||
*/
|
|
||||||
if (tss->io_bitmap.prev_sequence != iobm->sequence)
|
|
||||||
tss_copy_io_bitmap(tss, iobm);
|
|
||||||
|
|
||||||
/* Enable the bitmap */
|
|
||||||
*base = IO_BITMAP_OFFSET_VALID_MAP;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Make sure that the TSS limit is covering the io bitmap.
|
|
||||||
* It might have been cut down by a VMEXIT to 0x67 which
|
|
||||||
* would cause a subsequent I/O access from user space to
|
|
||||||
* trigger a #GP because tbe bitmap is outside the TSS
|
|
||||||
* limit.
|
|
||||||
*/
|
|
||||||
refresh_tss_limit();
|
|
||||||
} else {
|
|
||||||
tss_invalidate_io_bitmap(tss);
|
tss_invalidate_io_bitmap(tss);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) {
|
||||||
|
*base = IO_BITMAP_OFFSET_VALID_ALL;
|
||||||
|
} else {
|
||||||
|
struct io_bitmap *iobm = t->io_bitmap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only copy bitmap data when the sequence number differs. The
|
||||||
|
* update time is accounted to the incoming task.
|
||||||
|
*/
|
||||||
|
if (tss->io_bitmap.prev_sequence != iobm->sequence)
|
||||||
|
tss_copy_io_bitmap(tss, iobm);
|
||||||
|
|
||||||
|
/* Enable the bitmap */
|
||||||
|
*base = IO_BITMAP_OFFSET_VALID_MAP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure that the TSS limit is covering the IO bitmap. It might have
|
||||||
|
* been cut down by a VMEXIT to 0x67 which would cause a subsequent I/O
|
||||||
|
* access from user space to trigger a #GP because tbe bitmap is outside
|
||||||
|
* the TSS limit.
|
||||||
|
*/
|
||||||
|
refresh_tss_limit();
|
||||||
}
|
}
|
||||||
#else /* CONFIG_X86_IOPL_IOPERM */
|
#else /* CONFIG_X86_IOPL_IOPERM */
|
||||||
static inline void switch_to_bitmap(unsigned long tifp) { }
|
static inline void switch_to_bitmap(unsigned long tifp) { }
|
||||||
|
@ -182,6 +182,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
|
|||||||
static int set_segment_reg(struct task_struct *task,
|
static int set_segment_reg(struct task_struct *task,
|
||||||
unsigned long offset, u16 value)
|
unsigned long offset, u16 value)
|
||||||
{
|
{
|
||||||
|
if (WARN_ON_ONCE(task == current))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The value argument was already truncated to 16 bits.
|
* The value argument was already truncated to 16 bits.
|
||||||
*/
|
*/
|
||||||
@ -209,10 +212,7 @@ static int set_segment_reg(struct task_struct *task,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case offsetof(struct user_regs_struct, gs):
|
case offsetof(struct user_regs_struct, gs):
|
||||||
if (task == current)
|
task_user_gs(task) = value;
|
||||||
set_user_gs(task_pt_regs(task), value);
|
|
||||||
else
|
|
||||||
task_user_gs(task) = value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -272,32 +272,41 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
|
|||||||
static int set_segment_reg(struct task_struct *task,
|
static int set_segment_reg(struct task_struct *task,
|
||||||
unsigned long offset, u16 value)
|
unsigned long offset, u16 value)
|
||||||
{
|
{
|
||||||
|
if (WARN_ON_ONCE(task == current))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The value argument was already truncated to 16 bits.
|
* The value argument was already truncated to 16 bits.
|
||||||
*/
|
*/
|
||||||
if (invalid_selector(value))
|
if (invalid_selector(value))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function has some ABI oddities.
|
||||||
|
*
|
||||||
|
* A 32-bit ptracer probably expects that writing FS or GS will change
|
||||||
|
* FSBASE or GSBASE respectively. In the absence of FSGSBASE support,
|
||||||
|
* this code indeed has that effect. When FSGSBASE is added, this
|
||||||
|
* will require a special case.
|
||||||
|
*
|
||||||
|
* For existing 64-bit ptracers, writing FS or GS *also* currently
|
||||||
|
* changes the base if the selector is nonzero the next time the task
|
||||||
|
* is run. This behavior may not be needed, and trying to preserve it
|
||||||
|
* when FSGSBASE is added would be complicated at best.
|
||||||
|
*/
|
||||||
|
|
||||||
switch (offset) {
|
switch (offset) {
|
||||||
case offsetof(struct user_regs_struct,fs):
|
case offsetof(struct user_regs_struct,fs):
|
||||||
task->thread.fsindex = value;
|
task->thread.fsindex = value;
|
||||||
if (task == current)
|
|
||||||
loadsegment(fs, task->thread.fsindex);
|
|
||||||
break;
|
break;
|
||||||
case offsetof(struct user_regs_struct,gs):
|
case offsetof(struct user_regs_struct,gs):
|
||||||
task->thread.gsindex = value;
|
task->thread.gsindex = value;
|
||||||
if (task == current)
|
|
||||||
load_gs_index(task->thread.gsindex);
|
|
||||||
break;
|
break;
|
||||||
case offsetof(struct user_regs_struct,ds):
|
case offsetof(struct user_regs_struct,ds):
|
||||||
task->thread.ds = value;
|
task->thread.ds = value;
|
||||||
if (task == current)
|
|
||||||
loadsegment(ds, task->thread.ds);
|
|
||||||
break;
|
break;
|
||||||
case offsetof(struct user_regs_struct,es):
|
case offsetof(struct user_regs_struct,es):
|
||||||
task->thread.es = value;
|
task->thread.es = value;
|
||||||
if (task == current)
|
|
||||||
loadsegment(es, task->thread.es);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -375,6 +384,9 @@ static int putreg(struct task_struct *child,
|
|||||||
* When changing the FS base, use do_arch_prctl_64()
|
* When changing the FS base, use do_arch_prctl_64()
|
||||||
* to set the index to zero and to set the base
|
* to set the index to zero and to set the base
|
||||||
* as requested.
|
* as requested.
|
||||||
|
*
|
||||||
|
* NB: This behavior is nonsensical and likely needs to
|
||||||
|
* change when FSGSBASE support is added.
|
||||||
*/
|
*/
|
||||||
if (child->thread.fsbase != value)
|
if (child->thread.fsbase != value)
|
||||||
return do_arch_prctl_64(child, ARCH_SET_FS, value);
|
return do_arch_prctl_64(child, ARCH_SET_FS, value);
|
||||||
|
@ -306,8 +306,23 @@ __visible void __noreturn handle_stack_overflow(const char *message,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
|
||||||
/* Runs on IST stack */
|
/*
|
||||||
|
* Runs on an IST stack for x86_64 and on a special task stack for x86_32.
|
||||||
|
*
|
||||||
|
* On x86_64, this is more or less a normal kernel entry. Notwithstanding the
|
||||||
|
* SDM's warnings about double faults being unrecoverable, returning works as
|
||||||
|
* expected. Presumably what the SDM actually means is that the CPU may get
|
||||||
|
* the register state wrong on entry, so returning could be a bad idea.
|
||||||
|
*
|
||||||
|
* Various CPU engineers have promised that double faults due to an IRET fault
|
||||||
|
* while the stack is read-only are, in fact, recoverable.
|
||||||
|
*
|
||||||
|
* On x86_32, this is entered through a task gate, and regs are synthesized
|
||||||
|
* from the TSS. Returning is, in principle, okay, but changes to regs will
|
||||||
|
* be lost. If, for some reason, we need to return to a context with modified
|
||||||
|
* regs, the shim code could be adjusted to synchronize the registers.
|
||||||
|
*/
|
||||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
|
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
|
||||||
{
|
{
|
||||||
static const char str[] = "double fault";
|
static const char str[] = "double fault";
|
||||||
@ -411,15 +426,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
|||||||
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
|
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_DOUBLEFAULT
|
pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
|
||||||
df_debug(regs, error_code);
|
die("double fault", regs, error_code);
|
||||||
#endif
|
panic("Machine halted.");
|
||||||
/*
|
|
||||||
* This is always a kernel trap and never fixable (and thus must
|
|
||||||
* never return).
|
|
||||||
*/
|
|
||||||
for (;;)
|
|
||||||
die(str, regs, error_code);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -17,6 +17,10 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
|
|||||||
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
|
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
|
||||||
|
DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
|
||||||
|
#endif
|
||||||
|
|
||||||
struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||||
{
|
{
|
||||||
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
|
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
|
||||||
@ -108,7 +112,15 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
|
|||||||
cea_map_stack(MCE);
|
cea_map_stack(MCE);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
|
static inline void percpu_setup_exception_stacks(unsigned int cpu)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DOUBLEFAULT
|
||||||
|
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
|
||||||
|
|
||||||
|
cea_map_percpu_pages(&cea->doublefault_stack,
|
||||||
|
&per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Setup the fixmap mappings only once per-processor */
|
/* Setup the fixmap mappings only once per-processor */
|
||||||
|
@ -197,7 +197,7 @@ void vmalloc_sync_all(void)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
for (address = VMALLOC_START & PMD_MASK;
|
for (address = VMALLOC_START & PMD_MASK;
|
||||||
address >= TASK_SIZE_MAX && address < FIXADDR_TOP;
|
address >= TASK_SIZE_MAX && address < VMALLOC_END;
|
||||||
address += PMD_SIZE) {
|
address += PMD_SIZE) {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type)
|
|||||||
{
|
{
|
||||||
struct memtype *match;
|
struct memtype *match;
|
||||||
|
|
||||||
match = memtype_interval_iter_first(&memtype_rbroot, start, end);
|
match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
|
||||||
while (match != NULL && match->start < end) {
|
while (match != NULL && match->start < end) {
|
||||||
if ((match_type == MEMTYPE_EXACT_MATCH) &&
|
if ((match_type == MEMTYPE_EXACT_MATCH) &&
|
||||||
(match->start == start) && (match->end == end))
|
(match->start == start) && (match->end == end))
|
||||||
@ -66,7 +66,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type)
|
|||||||
(match->start < start) && (match->end == end))
|
(match->start < start) && (match->end == end))
|
||||||
return match;
|
return match;
|
||||||
|
|
||||||
match = memtype_interval_iter_next(match, start, end);
|
match = memtype_interval_iter_next(match, start, end-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL; /* Returns NULL if there is no match */
|
return NULL; /* Returns NULL if there is no match */
|
||||||
@ -79,7 +79,7 @@ static int memtype_check_conflict(u64 start, u64 end,
|
|||||||
struct memtype *match;
|
struct memtype *match;
|
||||||
enum page_cache_mode found_type = reqtype;
|
enum page_cache_mode found_type = reqtype;
|
||||||
|
|
||||||
match = memtype_interval_iter_first(&memtype_rbroot, start, end);
|
match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
|
||||||
if (match == NULL)
|
if (match == NULL)
|
||||||
goto success;
|
goto success;
|
||||||
|
|
||||||
@ -89,12 +89,12 @@ static int memtype_check_conflict(u64 start, u64 end,
|
|||||||
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
|
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
|
||||||
found_type = match->type;
|
found_type = match->type;
|
||||||
|
|
||||||
match = memtype_interval_iter_next(match, start, end);
|
match = memtype_interval_iter_next(match, start, end-1);
|
||||||
while (match) {
|
while (match) {
|
||||||
if (match->type != found_type)
|
if (match->type != found_type)
|
||||||
goto failure;
|
goto failure;
|
||||||
|
|
||||||
match = memtype_interval_iter_next(match, start, end);
|
match = memtype_interval_iter_next(match, start, end-1);
|
||||||
}
|
}
|
||||||
success:
|
success:
|
||||||
if (newtype)
|
if (newtype)
|
||||||
@ -160,7 +160,7 @@ struct memtype *memtype_erase(u64 start, u64 end)
|
|||||||
struct memtype *memtype_lookup(u64 addr)
|
struct memtype *memtype_lookup(u64 addr)
|
||||||
{
|
{
|
||||||
return memtype_interval_iter_first(&memtype_rbroot, addr,
|
return memtype_interval_iter_first(&memtype_rbroot, addr,
|
||||||
addr + PAGE_SIZE);
|
addr + PAGE_SIZE-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
@ -12,6 +12,10 @@
|
|||||||
#include <linux/sched/task_stack.h>
|
#include <linux/sched/task_stack.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
#include <asm/desc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
struct lkdtm_list {
|
struct lkdtm_list {
|
||||||
struct list_head node;
|
struct list_head node;
|
||||||
};
|
};
|
||||||
@ -337,3 +341,38 @@ void lkdtm_UNSET_SMEP(void)
|
|||||||
pr_err("FAIL: this test is x86_64-only\n");
|
pr_err("FAIL: this test is x86_64-only\n");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
void lkdtm_DOUBLE_FAULT(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Trigger #DF by setting the stack limit to zero. This clobbers
|
||||||
|
* a GDT TLS slot, which is okay because the current task will die
|
||||||
|
* anyway due to the double fault.
|
||||||
|
*/
|
||||||
|
struct desc_struct d = {
|
||||||
|
.type = 3, /* expand-up, writable, accessed data */
|
||||||
|
.p = 1, /* present */
|
||||||
|
.d = 1, /* 32-bit */
|
||||||
|
.g = 0, /* limit in bytes */
|
||||||
|
.s = 1, /* not system */
|
||||||
|
};
|
||||||
|
|
||||||
|
local_irq_disable();
|
||||||
|
write_gdt_entry(get_cpu_gdt_rw(smp_processor_id()),
|
||||||
|
GDT_ENTRY_TLS_MIN, &d, DESCTYPE_S);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Put our zero-limit segment in SS and then trigger a fault. The
|
||||||
|
* 4-byte access to (%esp) will fault with #SS, and the attempt to
|
||||||
|
* deliver the fault will recursively cause #SS and result in #DF.
|
||||||
|
* This whole process happens while NMIs and MCEs are blocked by the
|
||||||
|
* MOV SS window. This is nice because an NMI with an invalid SS
|
||||||
|
* would also double-fault, resulting in the NMI or MCE being lost.
|
||||||
|
*/
|
||||||
|
asm volatile ("movw %0, %%ss; addl $0, (%%esp)" ::
|
||||||
|
"r" ((unsigned short)(GDT_ENTRY_TLS_MIN << 3)));
|
||||||
|
|
||||||
|
panic("tried to double fault but didn't die\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -171,6 +171,9 @@ static const struct crashtype crashtypes[] = {
|
|||||||
CRASHTYPE(USERCOPY_KERNEL_DS),
|
CRASHTYPE(USERCOPY_KERNEL_DS),
|
||||||
CRASHTYPE(STACKLEAK_ERASING),
|
CRASHTYPE(STACKLEAK_ERASING),
|
||||||
CRASHTYPE(CFI_FORWARD_PROTO),
|
CRASHTYPE(CFI_FORWARD_PROTO),
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
CRASHTYPE(DOUBLE_FAULT),
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,6 +28,9 @@ void lkdtm_CORRUPT_USER_DS(void);
|
|||||||
void lkdtm_STACK_GUARD_PAGE_LEADING(void);
|
void lkdtm_STACK_GUARD_PAGE_LEADING(void);
|
||||||
void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
|
void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
|
||||||
void lkdtm_UNSET_SMEP(void);
|
void lkdtm_UNSET_SMEP(void);
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
void lkdtm_DOUBLE_FAULT(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* lkdtm_heap.c */
|
/* lkdtm_heap.c */
|
||||||
void __init lkdtm_heap_init(void);
|
void __init lkdtm_heap_init(void);
|
||||||
|
@ -43,7 +43,19 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
|||||||
err(1, "sigaction");
|
err(1, "sigaction");
|
||||||
}
|
}
|
||||||
|
|
||||||
static volatile sig_atomic_t sig_traps;
|
static void clearhandler(int sig)
|
||||||
|
{
|
||||||
|
struct sigaction sa;
|
||||||
|
memset(&sa, 0, sizeof(sa));
|
||||||
|
sa.sa_handler = SIG_DFL;
|
||||||
|
sigemptyset(&sa.sa_mask);
|
||||||
|
if (sigaction(sig, &sa, 0))
|
||||||
|
err(1, "sigaction");
|
||||||
|
}
|
||||||
|
|
||||||
|
static volatile sig_atomic_t sig_traps, sig_eflags;
|
||||||
|
sigjmp_buf jmpbuf;
|
||||||
|
static unsigned char altstack_data[SIGSTKSZ];
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
# define REG_IP REG_RIP
|
# define REG_IP REG_RIP
|
||||||
@ -90,6 +102,25 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char const * const signames[] = {
|
||||||
|
[SIGSEGV] = "SIGSEGV",
|
||||||
|
[SIGBUS] = "SIBGUS",
|
||||||
|
[SIGTRAP] = "SIGTRAP",
|
||||||
|
[SIGILL] = "SIGILL",
|
||||||
|
};
|
||||||
|
|
||||||
|
static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
|
||||||
|
{
|
||||||
|
ucontext_t *ctx = ctx_void;
|
||||||
|
|
||||||
|
printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
|
||||||
|
(unsigned long)ctx->uc_mcontext.gregs[REG_IP],
|
||||||
|
(unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
|
||||||
|
|
||||||
|
sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
|
||||||
|
siglongjmp(jmpbuf, 1);
|
||||||
|
}
|
||||||
|
|
||||||
static void check_result(void)
|
static void check_result(void)
|
||||||
{
|
{
|
||||||
unsigned long new_eflags = get_eflags();
|
unsigned long new_eflags = get_eflags();
|
||||||
@ -109,6 +140,22 @@ static void check_result(void)
|
|||||||
sig_traps = 0;
|
sig_traps = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void fast_syscall_no_tf(void)
|
||||||
|
{
|
||||||
|
sig_traps = 0;
|
||||||
|
printf("[RUN]\tFast syscall with TF cleared\n");
|
||||||
|
fflush(stdout); /* Force a syscall */
|
||||||
|
if (get_eflags() & X86_EFLAGS_TF) {
|
||||||
|
printf("[FAIL]\tTF is now set\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (sig_traps) {
|
||||||
|
printf("[FAIL]\tGot SIGTRAP\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
printf("[OK]\tNothing unexpected happened\n");
|
||||||
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
#ifdef CAN_BUILD_32
|
#ifdef CAN_BUILD_32
|
||||||
@ -163,17 +210,46 @@ int main()
|
|||||||
check_result();
|
check_result();
|
||||||
|
|
||||||
/* Now make sure that another fast syscall doesn't set TF again. */
|
/* Now make sure that another fast syscall doesn't set TF again. */
|
||||||
printf("[RUN]\tFast syscall with TF cleared\n");
|
fast_syscall_no_tf();
|
||||||
fflush(stdout); /* Force a syscall */
|
|
||||||
if (get_eflags() & X86_EFLAGS_TF) {
|
/*
|
||||||
printf("[FAIL]\tTF is now set\n");
|
* And do a forced SYSENTER to make sure that this works even if
|
||||||
|
* fast syscalls don't use SYSENTER.
|
||||||
|
*
|
||||||
|
* Invoking SYSENTER directly breaks all the rules. Just handle
|
||||||
|
* the SIGSEGV.
|
||||||
|
*/
|
||||||
|
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||||
|
unsigned long nr = SYS_getpid;
|
||||||
|
printf("[RUN]\tSet TF and check SYSENTER\n");
|
||||||
|
stack_t stack = {
|
||||||
|
.ss_sp = altstack_data,
|
||||||
|
.ss_size = SIGSTKSZ,
|
||||||
|
};
|
||||||
|
if (sigaltstack(&stack, NULL) != 0)
|
||||||
|
err(1, "sigaltstack");
|
||||||
|
sethandler(SIGSEGV, print_and_longjmp,
|
||||||
|
SA_RESETHAND | SA_ONSTACK);
|
||||||
|
sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
|
||||||
|
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
||||||
|
/* Clear EBP first to make sure we segfault cleanly. */
|
||||||
|
asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
|
||||||
|
#ifdef __x86_64__
|
||||||
|
, "r11"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
|
||||||
|
/* We're unreachable here. SYSENTER forgets RIP. */
|
||||||
|
}
|
||||||
|
clearhandler(SIGSEGV);
|
||||||
|
clearhandler(SIGILL);
|
||||||
|
if (!(sig_eflags & X86_EFLAGS_TF)) {
|
||||||
|
printf("[FAIL]\tTF was cleared\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (sig_traps) {
|
|
||||||
printf("[FAIL]\tGot SIGTRAP\n");
|
/* Now make sure that another fast syscall doesn't set TF again. */
|
||||||
exit(1);
|
fast_syscall_no_tf();
|
||||||
}
|
|
||||||
printf("[OK]\tNothing unexpected happened\n");
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user