Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 irq updates from Ingo Molnar: "Here are the main changes in this tree: - Introduce x86-64 IRQ/exception/debug stack guard pages to detect stack overflows immediately and deterministically. - Clean up over a decade worth of cruft accumulated. The outcome of this should be more clear-cut faults/crashes when any of the low level x86 CPU stacks overflow, instead of silent memory corruption and sporadic failures much later on" * 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) x86/irq: Fix outdated comments x86/irq/64: Remove stack overflow debug code x86/irq/64: Remap the IRQ stack with guard pages x86/irq/64: Split the IRQ stack into its own pages x86/irq/64: Init hardirq_stack_ptr during CPU hotplug x86/irq/32: Handle irq stack allocation failure proper x86/irq/32: Invoke irq_ctx_init() from init_IRQ() x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr x86/irq/32: Make irq stack a character array x86/irq/32: Define IRQ_STACK_SIZE x86/dumpstack/64: Speedup in_exception_stack() x86/exceptions: Split debug IST stack x86/exceptions: Enable IST guard pages x86/exceptions: Disconnect IST index and stack order x86/cpu: Remove orig_ist array x86/cpu: Prepare TSS.IST setup for guard pages x86/dumpstack/64: Use cpu_entry_area instead of orig_ist x86/irq/64: Use cpu entry area instead of orig_ist x86/traps: Use cpu_entry_area instead of orig_ist ...
This commit is contained in:
commit
8f14772703
@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt.
|
||||
|
||||
The currently assigned IST stacks are :-
|
||||
|
||||
* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 8 - Double Fault Exception (#DF).
|
||||
|
||||
@ -68,7 +68,7 @@ The currently assigned IST stacks are :-
|
||||
Using a separate stack allows the kernel to recover from it well enough
|
||||
in many cases to still output an oops.
|
||||
|
||||
* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for non-maskable interrupts (NMI).
|
||||
|
||||
@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
|
||||
middle of switching stacks. Using IST for NMI events avoids making
|
||||
assumptions about the previous state of the kernel stack.
|
||||
|
||||
* DEBUG_STACK. DEBUG_STKSZ
|
||||
* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for hardware debug interrupts (interrupt 1) and for software
|
||||
debug interrupts (INT3).
|
||||
@ -86,7 +86,12 @@ The currently assigned IST stacks are :-
|
||||
avoids making assumptions about the previous state of the kernel
|
||||
stack.
|
||||
|
||||
* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
To handle nested #DB correctly there exist two instances of DB stacks. On
|
||||
#DB entry the IST stackpointer for #DB is switched to the second instance
|
||||
so a nested #DB starts from a clean stack. The nested #DB switches
|
||||
the IST stackpointer to a guard hole to catch triple nesting.
|
||||
|
||||
* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 18 - Machine Check Exception (#MC).
|
||||
|
||||
|
@ -14,6 +14,7 @@ config X86_32
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select CLKSRC_I8253
|
||||
select CLONE_BACKWARDS
|
||||
select HAVE_DEBUG_STACKOVERFLOW
|
||||
select MODULES_USE_ELF_REL
|
||||
select OLD_SIGACTION
|
||||
|
||||
@ -138,7 +139,6 @@ config X86
|
||||
select HAVE_COPY_THREAD_TLS
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DEBUG_STACKOVERFLOW
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
|
@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
|
||||
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
movq TASK_stack_canary(%rsi), %rbx
|
||||
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
|
||||
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
@ -430,8 +430,8 @@ END(irq_entries_start)
|
||||
* it before we actually move ourselves to the IRQ stack.
|
||||
*/
|
||||
|
||||
movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
|
||||
movq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
|
||||
movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
|
||||
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
/*
|
||||
@ -840,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
/*
|
||||
* Exception entry points.
|
||||
*/
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
|
||||
|
||||
/**
|
||||
* idtentry - Generate an IDT entry stub
|
||||
@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
* @paranoid == 2 is special: the stub will never switch stacks. This is for
|
||||
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
|
||||
*/
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
|
||||
ENTRY(\sym)
|
||||
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|
||||
|
||||
@ -924,13 +924,13 @@ ENTRY(\sym)
|
||||
.endif
|
||||
|
||||
.if \shift_ist != -1
|
||||
subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
|
||||
subq $\ist_offset, CPU_TSS_IST(\shift_ist)
|
||||
.endif
|
||||
|
||||
call \do_sym
|
||||
|
||||
.if \shift_ist != -1
|
||||
addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
|
||||
addq $\ist_offset, CPU_TSS_IST(\shift_ist)
|
||||
.endif
|
||||
|
||||
/* these procedures expect "no swapgs" flag in ebx */
|
||||
@ -1128,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
|
||||
hv_stimer0_callback_vector hv_stimer0_vector_handler
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
|
||||
idtentry int3 do_int3 has_error_code=0
|
||||
idtentry stack_segment do_stack_segment has_error_code=1
|
||||
|
||||
|
@ -7,6 +7,64 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel_ds.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/* Macro to enforce the same ordering and stack sizes */
|
||||
#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
|
||||
char DF_stack_guard[guardsize]; \
|
||||
char DF_stack[EXCEPTION_STKSZ]; \
|
||||
char NMI_stack_guard[guardsize]; \
|
||||
char NMI_stack[EXCEPTION_STKSZ]; \
|
||||
char DB2_stack_guard[guardsize]; \
|
||||
char DB2_stack[db2_holesize]; \
|
||||
char DB1_stack_guard[guardsize]; \
|
||||
char DB1_stack[EXCEPTION_STKSZ]; \
|
||||
char DB_stack_guard[guardsize]; \
|
||||
char DB_stack[EXCEPTION_STKSZ]; \
|
||||
char MCE_stack_guard[guardsize]; \
|
||||
char MCE_stack[EXCEPTION_STKSZ]; \
|
||||
char IST_top_guard[guardsize]; \
|
||||
|
||||
/* The exception stacks' physical storage. No guard pages required */
|
||||
struct exception_stacks {
|
||||
ESTACKS_MEMBERS(0, 0)
|
||||
};
|
||||
|
||||
/* The effective cpu entry area mapping with guard pages. */
|
||||
struct cea_exception_stacks {
|
||||
ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
|
||||
};
|
||||
|
||||
/*
|
||||
* The exception stack ordering in [cea_]exception_stacks
|
||||
*/
|
||||
enum exception_stack_ordering {
|
||||
ESTACK_DF,
|
||||
ESTACK_NMI,
|
||||
ESTACK_DB2,
|
||||
ESTACK_DB1,
|
||||
ESTACK_DB,
|
||||
ESTACK_MCE,
|
||||
N_EXCEPTION_STACKS
|
||||
};
|
||||
|
||||
#define CEA_ESTACK_SIZE(st) \
|
||||
sizeof(((struct cea_exception_stacks *)0)->st## _stack)
|
||||
|
||||
#define CEA_ESTACK_BOT(ceastp, st) \
|
||||
((unsigned long)&(ceastp)->st## _stack)
|
||||
|
||||
#define CEA_ESTACK_TOP(ceastp, st) \
|
||||
(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
|
||||
|
||||
#define CEA_ESTACK_OFFS(st) \
|
||||
offsetof(struct cea_exception_stacks, st## _stack)
|
||||
|
||||
#define CEA_ESTACK_PAGES \
|
||||
(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||
* and early entry/exit code. Real types aren't used for all fields here
|
||||
@ -32,12 +90,9 @@ struct cpu_entry_area {
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Exception stacks used for IST entries.
|
||||
*
|
||||
* In the future, this should have a separate slot for each stack
|
||||
* with guard pages between them.
|
||||
* Exception stacks used for IST entries with guard pages.
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
struct cea_exception_stacks estacks;
|
||||
#endif
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
/*
|
||||
@ -57,6 +112,7 @@ struct cpu_entry_area {
|
||||
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
|
||||
|
||||
extern void setup_cpu_entry_areas(void);
|
||||
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
||||
@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
|
||||
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
|
||||
}
|
||||
|
||||
#define __this_cpu_ist_top_va(name) \
|
||||
CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
|
||||
|
||||
#endif
|
||||
|
@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
|
||||
{
|
||||
__this_cpu_dec(debug_stack_usage);
|
||||
}
|
||||
int is_debug_stack(unsigned long addr);
|
||||
void debug_stack_set_zero(void);
|
||||
void debug_stack_reset(void);
|
||||
#else /* !X86_64 */
|
||||
static inline int is_debug_stack(unsigned long addr) { return 0; }
|
||||
static inline void debug_stack_set_zero(void) { }
|
||||
static inline void debug_stack_reset(void) { }
|
||||
static inline void debug_stack_usage_inc(void) { }
|
||||
|
@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
|
||||
return ((irq == 2) ? 9 : irq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
extern void irq_ctx_init(int cpu);
|
||||
#else
|
||||
# define irq_ctx_init(cpu) do { } while (0)
|
||||
#endif
|
||||
extern int irq_init_percpu_irqstack(unsigned int cpu);
|
||||
|
||||
#define __ARCH_HAS_DO_SOFTIRQ
|
||||
|
||||
|
@ -18,8 +18,8 @@
|
||||
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
||||
* Vectors 32 ... 127 : device interrupts
|
||||
* Vector 128 : legacy int80 syscall interface
|
||||
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
|
||||
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
|
||||
* Vectors 129 ... LOCAL_TIMER_VECTOR-1
|
||||
* Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
|
||||
*
|
||||
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
|
||||
*
|
||||
|
@ -22,11 +22,9 @@
|
||||
#define THREAD_SIZE_ORDER 1
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
|
||||
#define DOUBLEFAULT_STACK 1
|
||||
#define NMI_STACK 0
|
||||
#define DEBUG_STACK 0
|
||||
#define MCE_STACK 0
|
||||
#define N_EXCEPTION_STACKS 1
|
||||
#define IRQ_STACK_SIZE THREAD_SIZE
|
||||
|
||||
#define N_EXCEPTION_STACKS 1
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/*
|
||||
|
@ -14,22 +14,20 @@
|
||||
|
||||
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
#define CURRENT_MASK (~(THREAD_SIZE - 1))
|
||||
|
||||
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
|
||||
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
|
||||
|
||||
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
|
||||
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
|
||||
|
||||
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
|
||||
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
|
||||
|
||||
#define DOUBLEFAULT_STACK 1
|
||||
#define NMI_STACK 2
|
||||
#define DEBUG_STACK 3
|
||||
#define MCE_STACK 4
|
||||
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
|
||||
/*
|
||||
* The index for the tss.ist[] array. The hardware limit is 7 entries.
|
||||
*/
|
||||
#define IST_INDEX_DF 0
|
||||
#define IST_INDEX_NMI 1
|
||||
#define IST_INDEX_DB 2
|
||||
#define IST_INDEX_MCE 3
|
||||
|
||||
/*
|
||||
* Set __PAGE_OFFSET to the most negative possible address +
|
||||
|
@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
|
||||
#define __KERNEL_TSS_LIMIT \
|
||||
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
|
||||
|
||||
/* Per CPU interrupt stacks */
|
||||
struct irq_stack {
|
||||
char stack[IRQ_STACK_SIZE];
|
||||
} __aligned(IRQ_STACK_SIZE);
|
||||
|
||||
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#else
|
||||
@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Save the original ist values for checking stack pointers during debugging
|
||||
*/
|
||||
struct orig_ist {
|
||||
unsigned long ist[7];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
||||
|
||||
union irq_stack_union {
|
||||
char irq_stack[IRQ_STACK_SIZE];
|
||||
struct fixed_percpu_data {
|
||||
/*
|
||||
* GCC hardcodes the stack canary as %gs:40. Since the
|
||||
* irq_stack is the object at %gs:0, we reserve the bottom
|
||||
* 48 bytes of the irq stack for the canary.
|
||||
*/
|
||||
struct {
|
||||
char gs_base[40];
|
||||
unsigned long stack_canary;
|
||||
};
|
||||
char gs_base[40];
|
||||
unsigned long stack_canary;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
|
||||
DECLARE_INIT_PER_CPU(irq_stack_union);
|
||||
DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
|
||||
DECLARE_INIT_PER_CPU(fixed_percpu_data);
|
||||
|
||||
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
|
||||
{
|
||||
return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
|
||||
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(char *, irq_stack_ptr);
|
||||
DECLARE_PER_CPU(unsigned int, irq_count);
|
||||
extern asmlinkage void ignore_sysret(void);
|
||||
|
||||
@ -427,15 +421,8 @@ struct stack_canary {
|
||||
};
|
||||
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
|
||||
#endif
|
||||
/*
|
||||
* per-CPU IRQ handling stacks
|
||||
*/
|
||||
struct irq_stack {
|
||||
u32 stack[THREAD_SIZE/sizeof(u32)];
|
||||
} __aligned(THREAD_SIZE);
|
||||
|
||||
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
|
||||
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
|
||||
/* Per CPU softirq stack pointer */
|
||||
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
|
||||
#endif /* X86_64 */
|
||||
|
||||
extern unsigned int fpu_kernel_xstate_size;
|
||||
|
@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
|
||||
void native_smp_prepare_cpus(unsigned int max_cpus);
|
||||
void calculate_max_logical_packages(void);
|
||||
void native_smp_cpus_done(unsigned int max_cpus);
|
||||
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int native_cpu_disable(void);
|
||||
int common_cpu_die(unsigned int cpu);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* On x86_64, %gs is shared by percpu area and stack canary. All
|
||||
* percpu symbols are zero based and %gs points to the base of percpu
|
||||
* area. The first occupant of the percpu area is always
|
||||
* irq_stack_union which contains stack_canary at offset 40. Userland
|
||||
* fixed_percpu_data which contains stack_canary at offset 40. Userland
|
||||
* %gs is always saved and restored on kernel entry and exit using
|
||||
* swapgs, so stack protector doesn't add any complexity there.
|
||||
*
|
||||
@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
|
||||
u64 tsc;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
|
||||
BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
|
||||
#endif
|
||||
/*
|
||||
* We both use the random pool and the current TSC as a source
|
||||
@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
|
||||
|
||||
current->stack_canary = canary;
|
||||
#ifdef CONFIG_X86_64
|
||||
this_cpu_write(irq_stack_union.stack_canary, canary);
|
||||
this_cpu_write(fixed_percpu_data.stack_canary, canary);
|
||||
#else
|
||||
this_cpu_write(stack_canary.canary, canary);
|
||||
#endif
|
||||
|
@ -9,6 +9,8 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ptrace.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
enum stack_type {
|
||||
|
@ -68,10 +68,12 @@ int main(void)
|
||||
#undef ENTRY
|
||||
|
||||
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
||||
DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
|
||||
offsetof(struct cea_exception_stacks, DB1_stack));
|
||||
BLANK();
|
||||
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
|
||||
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
|
||||
BLANK();
|
||||
#endif
|
||||
|
||||
|
@ -507,19 +507,6 @@ void load_percpu_segment(int cpu)
|
||||
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Special IST stacks which the CPU switches to when it calls
|
||||
* an IST-marked descriptor entry. Up to 7 stacks (hardware
|
||||
* limit), all of them are 4K, except the debug stack which
|
||||
* is 8K.
|
||||
*/
|
||||
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Load the original GDT from the per-cpu structure */
|
||||
void load_direct_gdt(int cpu)
|
||||
{
|
||||
@ -1511,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg)
|
||||
__setup("clearcpuid=", setup_clearcpuid);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||
irq_stack_union) __aligned(PAGE_SIZE) __visible;
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
|
||||
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
|
||||
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
|
||||
|
||||
/*
|
||||
* The following percpu variables are hot. Align current_task to
|
||||
@ -1523,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
|
||||
&init_task;
|
||||
EXPORT_PER_CPU_SYMBOL(current_task);
|
||||
|
||||
DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
||||
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
|
||||
|
||||
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||
|
||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
@ -1562,23 +1547,7 @@ void syscall_init(void)
|
||||
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copies of the original ist values from the tss are only accessed during
|
||||
* debugging, no special alignment required.
|
||||
*/
|
||||
DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
|
||||
DEFINE_PER_CPU(int, debug_stack_usage);
|
||||
|
||||
int is_debug_stack(unsigned long addr)
|
||||
{
|
||||
return __this_cpu_read(debug_stack_usage) ||
|
||||
(addr <= __this_cpu_read(debug_stack_addr) &&
|
||||
addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
|
||||
}
|
||||
NOKPROBE_SYMBOL(is_debug_stack);
|
||||
|
||||
DEFINE_PER_CPU(u32, debug_idt_ctr);
|
||||
|
||||
void debug_stack_set_zero(void)
|
||||
@ -1690,17 +1659,14 @@ static void setup_getcpu(int cpu)
|
||||
* initialized (naturally) in the bootstrap process, such as the GDT
|
||||
* and IDT. We reload them nevertheless, this function acts as a
|
||||
* 'CPU state barrier', nothing should get across.
|
||||
* A lot of state is already set up in PDA init for 64 bit
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
void cpu_init(void)
|
||||
{
|
||||
struct orig_ist *oist;
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct task_struct *me;
|
||||
struct tss_struct *t;
|
||||
unsigned long v;
|
||||
int cpu = raw_smp_processor_id();
|
||||
int i;
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
@ -1715,7 +1681,6 @@ void cpu_init(void)
|
||||
load_ucode_ap();
|
||||
|
||||
t = &per_cpu(cpu_tss_rw, cpu);
|
||||
oist = &per_cpu(orig_ist, cpu);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
if (this_cpu_read(numa_node) == 0 &&
|
||||
@ -1753,16 +1718,11 @@ void cpu_init(void)
|
||||
/*
|
||||
* set up and load the per-CPU TSS
|
||||
*/
|
||||
if (!oist->ist[0]) {
|
||||
char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
|
||||
|
||||
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
||||
estacks += exception_stack_sizes[v];
|
||||
oist->ist[v] = t->x86_tss.ist[v] =
|
||||
(unsigned long)estacks;
|
||||
if (v == DEBUG_STACK-1)
|
||||
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
|
||||
}
|
||||
if (!t->x86_tss.ist[0]) {
|
||||
t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
|
||||
t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
|
||||
t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
|
||||
t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
|
||||
}
|
||||
|
||||
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
|
@ -34,14 +34,14 @@ const char *stack_type_name(enum stack_type type)
|
||||
|
||||
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
|
||||
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
|
||||
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
||||
|
||||
/*
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
*/
|
||||
if (stack <= begin || stack > end)
|
||||
if (stack < begin || stack > end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_IRQ;
|
||||
@ -59,14 +59,14 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
|
||||
|
||||
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
|
||||
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
|
||||
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
||||
|
||||
/*
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
*/
|
||||
if (stack <= begin || stack > end)
|
||||
if (stack < begin || stack > end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_SOFTIRQ;
|
||||
|
@ -16,23 +16,21 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
static char *exception_stack_names[N_EXCEPTION_STACKS] = {
|
||||
[ DOUBLEFAULT_STACK-1 ] = "#DF",
|
||||
[ NMI_STACK-1 ] = "NMI",
|
||||
[ DEBUG_STACK-1 ] = "#DB",
|
||||
[ MCE_STACK-1 ] = "#MC",
|
||||
};
|
||||
|
||||
static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
static const char * const exception_stack_names[] = {
|
||||
[ ESTACK_DF ] = "#DF",
|
||||
[ ESTACK_NMI ] = "NMI",
|
||||
[ ESTACK_DB2 ] = "#DB2",
|
||||
[ ESTACK_DB1 ] = "#DB1",
|
||||
[ ESTACK_DB ] = "#DB",
|
||||
[ ESTACK_MCE ] = "#MC",
|
||||
};
|
||||
|
||||
const char *stack_type_name(enum stack_type type)
|
||||
{
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
|
||||
|
||||
if (type == STACK_TYPE_IRQ)
|
||||
return "IRQ";
|
||||
@ -52,43 +50,84 @@ const char *stack_type_name(enum stack_type type)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct estack_pages - Page descriptor for exception stacks
|
||||
* @offs: Offset from the start of the exception stack area
|
||||
* @size: Size of the exception stack
|
||||
* @type: Type to store in the stack_info struct
|
||||
*/
|
||||
struct estack_pages {
|
||||
u32 offs;
|
||||
u16 size;
|
||||
u16 type;
|
||||
};
|
||||
|
||||
#define EPAGERANGE(st) \
|
||||
[PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \
|
||||
PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \
|
||||
.offs = CEA_ESTACK_OFFS(st), \
|
||||
.size = CEA_ESTACK_SIZE(st), \
|
||||
.type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, }
|
||||
|
||||
/*
|
||||
* Array of exception stack page descriptors. If the stack is larger than
|
||||
* PAGE_SIZE, all pages covering a particular stack will have the same
|
||||
* info. The guard pages including the not mapped DB2 stack are zeroed
|
||||
* out.
|
||||
*/
|
||||
static const
|
||||
struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
|
||||
EPAGERANGE(DF),
|
||||
EPAGERANGE(NMI),
|
||||
EPAGERANGE(DB1),
|
||||
EPAGERANGE(DB),
|
||||
EPAGERANGE(MCE),
|
||||
};
|
||||
|
||||
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *begin, *end;
|
||||
unsigned long begin, end, stk = (unsigned long)stack;
|
||||
const struct estack_pages *ep;
|
||||
struct pt_regs *regs;
|
||||
unsigned k;
|
||||
unsigned int k;
|
||||
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
|
||||
|
||||
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
|
||||
end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
|
||||
begin = end - (exception_stack_sizes[k] / sizeof(long));
|
||||
regs = (struct pt_regs *)end - 1;
|
||||
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
|
||||
end = begin + sizeof(struct cea_exception_stacks);
|
||||
/* Bail if @stack is outside the exception stack area. */
|
||||
if (stk < begin || stk >= end)
|
||||
return false;
|
||||
|
||||
if (stack <= begin || stack >= end)
|
||||
continue;
|
||||
/* Calc page offset from start of exception stacks */
|
||||
k = (stk - begin) >> PAGE_SHIFT;
|
||||
/* Lookup the page descriptor */
|
||||
ep = &estack_pages[k];
|
||||
/* Guard page? */
|
||||
if (!ep->size)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_EXCEPTION + k;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
info->next_sp = (unsigned long *)regs->sp;
|
||||
begin += (unsigned long)ep->offs;
|
||||
end = begin + (unsigned long)ep->size;
|
||||
regs = (struct pt_regs *)end - 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
info->type = ep->type;
|
||||
info->begin = (unsigned long *)begin;
|
||||
info->end = (unsigned long *)end;
|
||||
info->next_sp = (unsigned long *)regs->sp;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
|
||||
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
|
||||
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
|
||||
|
||||
/*
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
*/
|
||||
if (stack <= begin || stack > end)
|
||||
if (stack < begin || stack >= end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_IRQ;
|
||||
|
@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
|
||||
GLOBAL(initial_code)
|
||||
.quad x86_64_start_kernel
|
||||
GLOBAL(initial_gs)
|
||||
.quad INIT_PER_CPU_VAR(irq_stack_union)
|
||||
.quad INIT_PER_CPU_VAR(fixed_percpu_data)
|
||||
GLOBAL(initial_stack)
|
||||
/*
|
||||
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
|
||||
|
@ -41,13 +41,12 @@ struct idt_data {
|
||||
#define SYSG(_vector, _addr) \
|
||||
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
||||
|
||||
/* Interrupt gate with interrupt stack */
|
||||
/*
|
||||
* Interrupt gate with interrupt stack. The _ist index is the index in
|
||||
* the tss.ist[] array, but for the descriptor it needs to start at 1.
|
||||
*/
|
||||
#define ISTG(_vector, _addr, _ist) \
|
||||
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
||||
|
||||
/* System interrupt gate with interrupt stack */
|
||||
#define SISTG(_vector, _addr, _ist) \
|
||||
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
||||
G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
||||
|
||||
/* Task gate */
|
||||
#define TSKG(_vector, _gdt) \
|
||||
@ -184,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
|
||||
* cpu_init() when the TSS has been initialized.
|
||||
*/
|
||||
static const __initconst struct idt_data ist_idts[] = {
|
||||
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
|
||||
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
|
||||
ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
|
||||
ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
|
||||
ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
|
||||
ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
|
||||
#ifdef CONFIG_X86_MCE
|
||||
ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
|
||||
ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; }
|
||||
static inline void print_stack_overflow(void) { }
|
||||
#endif
|
||||
|
||||
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
|
||||
DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
|
||||
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||
DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
|
||||
|
||||
static void call_on_stack(void *func, void *stack)
|
||||
{
|
||||
@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
|
||||
u32 *isp, *prev_esp, arg1;
|
||||
|
||||
curstk = (struct irq_stack *) current_stack();
|
||||
irqstk = __this_cpu_read(hardirq_stack);
|
||||
irqstk = __this_cpu_read(hardirq_stack_ptr);
|
||||
|
||||
/*
|
||||
* this is where we switch to the IRQ stack. However, if we are
|
||||
@ -107,27 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate per-cpu stacks for hardirq and for softirq processing
|
||||
* Allocate per-cpu stacks for hardirq and softirq processing
|
||||
*/
|
||||
void irq_ctx_init(int cpu)
|
||||
int irq_init_percpu_irqstack(unsigned int cpu)
|
||||
{
|
||||
struct irq_stack *irqstk;
|
||||
int node = cpu_to_node(cpu);
|
||||
struct page *ph, *ps;
|
||||
|
||||
if (per_cpu(hardirq_stack, cpu))
|
||||
return;
|
||||
if (per_cpu(hardirq_stack_ptr, cpu))
|
||||
return 0;
|
||||
|
||||
irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
|
||||
THREADINFO_GFP,
|
||||
THREAD_SIZE_ORDER));
|
||||
per_cpu(hardirq_stack, cpu) = irqstk;
|
||||
ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
|
||||
if (!ph)
|
||||
return -ENOMEM;
|
||||
ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
|
||||
if (!ps) {
|
||||
__free_pages(ph, THREAD_SIZE_ORDER);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
|
||||
THREADINFO_GFP,
|
||||
THREAD_SIZE_ORDER));
|
||||
per_cpu(softirq_stack, cpu) = irqstk;
|
||||
|
||||
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
|
||||
cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
|
||||
per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
|
||||
per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void do_softirq_own_stack(void)
|
||||
@ -135,7 +136,7 @@ void do_softirq_own_stack(void)
|
||||
struct irq_stack *irqstk;
|
||||
u32 *isp, *prev_esp;
|
||||
|
||||
irqstk = __this_cpu_read(softirq_stack);
|
||||
irqstk = __this_cpu_read(softirq_stack_ptr);
|
||||
|
||||
/* build the stack frame on the softirq stack */
|
||||
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
|
||||
|
@ -18,63 +18,64 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
int sysctl_panic_on_stackoverflow;
|
||||
|
||||
/*
|
||||
* Probabilistic stack overflow check:
|
||||
*
|
||||
* Only check the stack in process context, because everything else
|
||||
* runs on the big interrupt stacks. Checking reliably is too expensive,
|
||||
* so we just check from interrupts.
|
||||
*/
|
||||
static inline void stack_overflow_check(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
||||
#define STACK_TOP_MARGIN 128
|
||||
struct orig_ist *oist;
|
||||
u64 irq_stack_top, irq_stack_bottom;
|
||||
u64 estack_top, estack_bottom;
|
||||
u64 curbase = (u64)task_stack_page(current);
|
||||
|
||||
if (user_mode(regs))
|
||||
return;
|
||||
|
||||
if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
|
||||
regs->sp <= curbase + THREAD_SIZE)
|
||||
return;
|
||||
|
||||
irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
|
||||
STACK_TOP_MARGIN;
|
||||
irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
|
||||
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
|
||||
return;
|
||||
|
||||
oist = this_cpu_ptr(&orig_ist);
|
||||
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
|
||||
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
|
||||
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
|
||||
return;
|
||||
|
||||
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
|
||||
current->comm, curbase, regs->sp,
|
||||
irq_stack_top, irq_stack_bottom,
|
||||
estack_top, estack_bottom, (void *)regs->ip);
|
||||
|
||||
if (sysctl_panic_on_stackoverflow)
|
||||
panic("low stack detected by irq handler - check messages\n");
|
||||
#endif
|
||||
}
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
|
||||
DECLARE_INIT_PER_CPU(irq_stack_backing_store);
|
||||
|
||||
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
||||
{
|
||||
stack_overflow_check(regs);
|
||||
|
||||
if (IS_ERR_OR_NULL(desc))
|
||||
return false;
|
||||
|
||||
generic_handle_irq_desc(desc);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* VMAP the backing store with guard pages
|
||||
*/
|
||||
static int map_irq_stack(unsigned int cpu)
|
||||
{
|
||||
char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu);
|
||||
struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE];
|
||||
void *va;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) {
|
||||
phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT));
|
||||
|
||||
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
|
||||
if (!va)
|
||||
return -ENOMEM;
|
||||
|
||||
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* If VMAP stacks are disabled due to KASAN, just use the per cpu
|
||||
* backing store without guard pages.
|
||||
*/
|
||||
static int map_irq_stack(unsigned int cpu)
|
||||
{
|
||||
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
|
||||
|
||||
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int irq_init_percpu_irqstack(unsigned int cpu)
|
||||
{
|
||||
if (per_cpu(hardirq_stack_ptr, cpu))
|
||||
return 0;
|
||||
return map_irq_stack(cpu);
|
||||
}
|
||||
|
@ -91,6 +91,8 @@ void __init init_IRQ(void)
|
||||
for (i = 0; i < nr_legacy_irqs(); i++)
|
||||
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
|
||||
|
||||
BUG_ON(irq_init_percpu_irqstack(smp_processor_id()));
|
||||
|
||||
x86_init.irqs.intr_init();
|
||||
}
|
||||
|
||||
@ -104,6 +106,4 @@ void __init native_init_IRQ(void)
|
||||
|
||||
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
|
||||
setup_irq(2, &irq2);
|
||||
|
||||
irq_ctx_init(smp_processor_id());
|
||||
}
|
||||
|
@ -21,13 +21,14 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#if defined(CONFIG_EDAC)
|
||||
#include <linux/edac.h>
|
||||
#endif
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/mach_traps.h>
|
||||
#include <asm/nmi.h>
|
||||
@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
|
||||
* switch back to the original IDT.
|
||||
*/
|
||||
static DEFINE_PER_CPU(int, update_debug_stack);
|
||||
|
||||
static bool notrace is_debug_stack(unsigned long addr)
|
||||
{
|
||||
struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
|
||||
unsigned long top = CEA_ESTACK_TOP(cs, DB);
|
||||
unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
|
||||
|
||||
if (__this_cpu_read(debug_stack_usage))
|
||||
return true;
|
||||
/*
|
||||
* Note, this covers the guard page between DB and DB1 as well to
|
||||
* avoid two checks. But by all means @addr can never point into
|
||||
* the guard page.
|
||||
*/
|
||||
return addr >= bot && addr < top;
|
||||
}
|
||||
NOKPROBE_SYMBOL(is_debug_stack);
|
||||
#endif
|
||||
|
||||
dotraplinkage notrace void
|
||||
|
@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
|
||||
per_cpu(x86_cpu_to_logical_apicid, cpu) =
|
||||
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
per_cpu(irq_stack_ptr, cpu) =
|
||||
per_cpu(irq_stack_union.irq_stack, cpu) +
|
||||
IRQ_STACK_SIZE;
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA
|
||||
per_cpu(x86_cpu_to_node_map, cpu) =
|
||||
early_per_cpu_map(x86_cpu_to_node_map, cpu);
|
||||
|
@ -935,20 +935,27 @@ out:
|
||||
return boot_error;
|
||||
}
|
||||
|
||||
void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
int common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Just in case we booted with a single CPU. */
|
||||
alternatives_enable_smp();
|
||||
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
|
||||
/* Initialize the interrupt stack(s) */
|
||||
ret = irq_init_percpu_irqstack(cpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||
irq_ctx_init(cpu);
|
||||
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
|
||||
#else
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
/* the FPU context is blank, nobody can own it */
|
||||
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
|
||||
|
||||
common_cpu_up(cpu, tidle);
|
||||
err = common_cpu_up(cpu, tidle);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
|
||||
if (err) {
|
||||
|
@ -403,7 +403,8 @@ SECTIONS
|
||||
*/
|
||||
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
|
||||
INIT_PER_CPU(gdt_page);
|
||||
INIT_PER_CPU(irq_stack_union);
|
||||
INIT_PER_CPU(fixed_percpu_data);
|
||||
INIT_PER_CPU(irq_stack_backing_store);
|
||||
|
||||
/*
|
||||
* Build-time check on the image size:
|
||||
@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
|
||||
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
. = ASSERT((irq_stack_union == 0),
|
||||
"irq_stack_union is not at start of per-cpu area");
|
||||
. = ASSERT((fixed_percpu_data == 0),
|
||||
"fixed_percpu_data is not at start of per-cpu area");
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
@ -13,8 +13,8 @@
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
|
||||
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
|
||||
#endif
|
||||
|
||||
struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||
@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
|
||||
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
|
||||
}
|
||||
|
||||
static void __init percpu_setup_debug_store(int cpu)
|
||||
static void __init percpu_setup_debug_store(unsigned int cpu)
|
||||
{
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
int npages;
|
||||
unsigned int npages;
|
||||
void *cea;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(int cpu)
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define cea_map_stack(name) do { \
|
||||
npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
|
||||
cea_map_percpu_pages(cea->estacks.name## _stack, \
|
||||
estacks->name## _stack, npages, PAGE_KERNEL); \
|
||||
} while (0)
|
||||
|
||||
static void __init percpu_setup_exception_stacks(unsigned int cpu)
|
||||
{
|
||||
struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
|
||||
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
|
||||
unsigned int npages;
|
||||
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||
|
||||
per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
|
||||
|
||||
/*
|
||||
* The exceptions stack mappings in the per cpu area are protected
|
||||
* by guard pages so each stack must be mapped separately. DB2 is
|
||||
* not mapped; it just exists to catch triple nesting of #DB.
|
||||
*/
|
||||
cea_map_stack(DF);
|
||||
cea_map_stack(NMI);
|
||||
cea_map_stack(DB1);
|
||||
cea_map_stack(DB);
|
||||
cea_map_stack(MCE);
|
||||
}
|
||||
#else
|
||||
static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
|
||||
#endif
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(unsigned int cpu)
|
||||
{
|
||||
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
||||
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
||||
@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu)
|
||||
pgprot_t tss_prot = PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
|
||||
gdt_prot);
|
||||
cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
|
||||
|
||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
|
||||
cea_map_percpu_pages(&cea->entry_stack_page,
|
||||
per_cpu_ptr(&entry_stack_storage, cpu), 1,
|
||||
PAGE_KERNEL);
|
||||
|
||||
@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu)
|
||||
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
||||
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
||||
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
|
||||
&per_cpu(cpu_tss_rw, cpu),
|
||||
cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
|
||||
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
||||
per_cpu(cpu_entry_area, cpu) = cea;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) !=
|
||||
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
|
||||
&per_cpu(exception_stacks, cpu),
|
||||
sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
|
||||
#endif
|
||||
percpu_setup_exception_stacks(cpu);
|
||||
|
||||
percpu_setup_debug_store(cpu);
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <asm/mmu_context.h> /* vma_pkey() */
|
||||
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
|
||||
#include <asm/desc.h> /* store_idt(), ... */
|
||||
#include <asm/cpu_entry_area.h> /* exception stack */
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/exceptions.h>
|
||||
@ -793,7 +794,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
if (is_vmalloc_addr((void *)address) &&
|
||||
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
||||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
|
||||
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
|
||||
/*
|
||||
* We're likely to be running with very little stack space
|
||||
* left. It's plausible that we'd hit this condition but
|
||||
|
@ -754,7 +754,7 @@ static void percpu_init(void)
|
||||
* __per_cpu_load
|
||||
*
|
||||
* The "gold" linker incorrectly associates:
|
||||
* init_per_cpu__irq_stack_union
|
||||
* init_per_cpu__fixed_percpu_data
|
||||
* init_per_cpu__gdt_page
|
||||
*/
|
||||
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
|
||||
|
@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
int rc;
|
||||
|
||||
common_cpu_up(cpu, idle);
|
||||
rc = common_cpu_up(cpu, idle);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
xen_setup_runstate_info(cpu);
|
||||
|
||||
|
@ -40,13 +40,13 @@ ENTRY(startup_xen)
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Set up %gs.
|
||||
*
|
||||
* The base of %gs always points to the bottom of the irqstack
|
||||
* union. If the stack protector canary is enabled, it is
|
||||
* located at %gs:40. Note that, on SMP, the boot cpu uses
|
||||
* init data section till per cpu areas are set up.
|
||||
* The base of %gs always points to fixed_percpu_data. If the
|
||||
* stack protector canary is enabled, it is located at %gs:40.
|
||||
* Note that, on SMP, the boot cpu uses init data section until
|
||||
* the per cpu areas are set up.
|
||||
*/
|
||||
movl $MSR_GS_BASE,%ecx
|
||||
movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
|
||||
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
|
||||
cdq
|
||||
wrmsr
|
||||
#endif
|
||||
|
@ -1687,7 +1687,6 @@ void __init xen_init_IRQ(void)
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
if (xen_pv_domain()) {
|
||||
irq_ctx_init(smp_processor_id());
|
||||
if (xen_initial_domain())
|
||||
pci_xen_initial_domain();
|
||||
}
|
||||
|
48
mm/slab.c
48
mm/slab.c
@ -1467,53 +1467,17 @@ static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
|
||||
unsigned long caller)
|
||||
{
|
||||
int size = cachep->object_size;
|
||||
|
||||
addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
|
||||
|
||||
if (size < 5 * sizeof(unsigned long))
|
||||
return;
|
||||
|
||||
*addr++ = 0x12345678;
|
||||
*addr++ = caller;
|
||||
*addr++ = smp_processor_id();
|
||||
size -= 3 * sizeof(unsigned long);
|
||||
{
|
||||
unsigned long *sptr = &caller;
|
||||
unsigned long svalue;
|
||||
|
||||
while (!kstack_end(sptr)) {
|
||||
svalue = *sptr++;
|
||||
if (kernel_text_address(svalue)) {
|
||||
*addr++ = svalue;
|
||||
size -= sizeof(unsigned long);
|
||||
if (size <= sizeof(unsigned long))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
*addr++ = 0x87654321;
|
||||
}
|
||||
|
||||
static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
||||
int map, unsigned long caller)
|
||||
static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
|
||||
{
|
||||
if (!is_debug_pagealloc_cache(cachep))
|
||||
return;
|
||||
|
||||
if (caller)
|
||||
store_stackinfo(cachep, objp, caller);
|
||||
|
||||
kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
||||
int map, unsigned long caller) {}
|
||||
int map) {}
|
||||
|
||||
#endif
|
||||
|
||||
@ -1661,7 +1625,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
|
||||
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
check_poison_obj(cachep, objp);
|
||||
slab_kernel_map(cachep, objp, 1, 0);
|
||||
slab_kernel_map(cachep, objp, 1);
|
||||
}
|
||||
if (cachep->flags & SLAB_RED_ZONE) {
|
||||
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
|
||||
@ -2433,7 +2397,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
|
||||
/* need to poison the objs? */
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
poison_obj(cachep, objp, POISON_FREE);
|
||||
slab_kernel_map(cachep, objp, 0, 0);
|
||||
slab_kernel_map(cachep, objp, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2812,7 +2776,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
|
||||
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
poison_obj(cachep, objp, POISON_FREE);
|
||||
slab_kernel_map(cachep, objp, 0, caller);
|
||||
slab_kernel_map(cachep, objp, 0);
|
||||
}
|
||||
return objp;
|
||||
}
|
||||
@ -3076,7 +3040,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
||||
return objp;
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
check_poison_obj(cachep, objp);
|
||||
slab_kernel_map(cachep, objp, 1, 0);
|
||||
slab_kernel_map(cachep, objp, 1);
|
||||
poison_obj(cachep, objp, POISON_INUSE);
|
||||
}
|
||||
if (cachep->flags & SLAB_STORE_USER)
|
||||
|
Loading…
Reference in New Issue
Block a user