Merge branch 'x86-threadinfo-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 threadinfo changes from Ingo Molnar:
 "The main change here is the consolidation/unification of 32 and 64 bit
  thread_info handling methods, from Steve Rostedt"

* 'x86-threadinfo-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, threadinfo: Redo "x86: Use inline assembler to get sp"
  x86: Clean up dumpstack_64.c code
  x86: Keep thread_info on thread stack in x86_32
  x86: Prepare removal of previous_esp from i386 thread_info structure
  x86: Nuke GET_THREAD_INFO_WITH_ESP() macro for i386
  x86: Nuke the supervisor_stack field in i386 thread_info
This commit is contained in:
Linus Torvalds 2014-04-01 10:17:18 -07:00
commit 99f7b025bf
9 changed files with 190 additions and 136 deletions

View File

@ -449,6 +449,15 @@ struct stack_canary {
}; };
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
/*
* per-CPU IRQ handling stacks
*/
struct irq_stack {
u32 stack[THREAD_SIZE/sizeof(u32)];
} __aligned(THREAD_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */ #endif /* X86_64 */
extern unsigned int xstate_size; extern unsigned int xstate_size;

View File

@ -9,6 +9,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/percpu.h>
#include <asm/types.h> #include <asm/types.h>
/* /*
@ -32,12 +33,6 @@ struct thread_info {
mm_segment_t addr_limit; mm_segment_t addr_limit;
struct restart_block restart_block; struct restart_block restart_block;
void __user *sysenter_return; void __user *sysenter_return;
#ifdef CONFIG_X86_32
unsigned long previous_esp; /* ESP of the previous stack in
case of nested (IRQ) stacks
*/
__u8 supervisor_stack[0];
#endif
unsigned int sig_on_uaccess_error:1; unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */ unsigned int uaccess_err:1; /* uaccess failed */
}; };
@ -153,9 +148,9 @@ struct thread_info {
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#ifdef CONFIG_X86_32 #define STACK_WARN (THREAD_SIZE/8)
#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
#define STACK_WARN (THREAD_SIZE/8)
/* /*
* macros/functions for gaining access to the thread information structure * macros/functions for gaining access to the thread information structure
* *
@ -163,42 +158,6 @@ struct thread_info {
*/ */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#define current_stack_pointer ({ \
unsigned long sp; \
asm("mov %%esp,%0" : "=g" (sp)); \
sp; \
})
/* how to get the thread information struct from C */
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)
(current_stack_pointer & ~(THREAD_SIZE - 1));
}
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \
movl $-THREAD_SIZE, reg; \
andl %esp, reg
/* use this one if reg already contains %esp */
#define GET_THREAD_INFO_WITH_ESP(reg) \
andl $-THREAD_SIZE, reg
#endif
#else /* X86_32 */
#include <asm/percpu.h>
#define KERNEL_STACK_OFFSET (5*8)
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
DECLARE_PER_CPU(unsigned long, kernel_stack); DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void) static inline struct thread_info *current_thread_info(void)
@ -213,8 +172,8 @@ static inline struct thread_info *current_thread_info(void)
/* how to get the thread information struct from ASM */ /* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \ #define GET_THREAD_INFO(reg) \
movq PER_CPU_VAR(kernel_stack),reg ; \ _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
/* /*
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
@ -224,8 +183,6 @@ static inline struct thread_info *current_thread_info(void)
#endif #endif
#endif /* !X86_32 */
/* /*
* Thread-synchronous status. * Thread-synchronous status.
* *

View File

@ -1079,6 +1079,10 @@ static __init int setup_disablecpuid(char *arg)
} }
__setup("clearcpuid=", setup_disablecpuid); __setup("clearcpuid=", setup_disablecpuid);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
@ -1095,10 +1099,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task; &init_task;
EXPORT_PER_CPU_SYMBOL(current_task); EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;

View File

@ -16,12 +16,35 @@
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
static void *is_irq_stack(void *p, void *irq)
{
if (p < irq || p >= (irq + THREAD_SIZE))
return NULL;
return irq + THREAD_SIZE;
}
static void *is_hardirq_stack(unsigned long *stack, int cpu)
{
void *irq = per_cpu(hardirq_stack, cpu);
return is_irq_stack(stack, irq);
}
static void *is_softirq_stack(unsigned long *stack, int cpu)
{
void *irq = per_cpu(softirq_stack, cpu);
return is_irq_stack(stack, irq);
}
void dump_trace(struct task_struct *task, struct pt_regs *regs, void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data) const struct stacktrace_ops *ops, void *data)
{ {
const unsigned cpu = get_cpu();
int graph = 0; int graph = 0;
u32 *prev_esp;
if (!task) if (!task)
task = current; task = current;
@ -39,18 +62,31 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
for (;;) { for (;;) {
struct thread_info *context; struct thread_info *context;
void *end_stack;
context = (struct thread_info *) end_stack = is_hardirq_stack(stack, cpu);
((unsigned long)stack & (~(THREAD_SIZE - 1))); if (!end_stack)
bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); end_stack = is_softirq_stack(stack, cpu);
stack = (unsigned long *)context->previous_esp; context = task_thread_info(task);
bp = ops->walk_stack(context, stack, bp, ops, data,
end_stack, &graph);
/* Stop if not on irq stack */
if (!end_stack)
break;
/* The previous esp is saved on the bottom of the stack */
prev_esp = (u32 *)(end_stack - THREAD_SIZE);
stack = (unsigned long *)*prev_esp;
if (!stack) if (!stack)
break; break;
if (ops->stack(data, "IRQ") < 0) if (ops->stack(data, "IRQ") < 0)
break; break;
touch_nmi_watchdog(); touch_nmi_watchdog();
} }
put_cpu();
} }
EXPORT_SYMBOL(dump_trace); EXPORT_SYMBOL(dump_trace);

View File

@ -104,6 +104,45 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
return (stack >= irq_stack && stack < irq_stack_end); return (stack >= irq_stack && stack < irq_stack_end);
} }
static const unsigned long irq_stack_size =
(IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
enum stack_type {
STACK_IS_UNKNOWN,
STACK_IS_NORMAL,
STACK_IS_EXCEPTION,
STACK_IS_IRQ,
};
static enum stack_type
analyze_stack(int cpu, struct task_struct *task,
unsigned long *stack, unsigned long **stack_end, char **id)
{
unsigned long *irq_stack;
unsigned long addr;
unsigned used = 0;
addr = ((unsigned long)stack & (~(THREAD_SIZE - 1)));
if ((unsigned long)task_stack_page(task) == addr)
return STACK_IS_NORMAL;
*stack_end = in_exception_stack(cpu, (unsigned long)stack,
&used, id);
if (*stack_end)
return STACK_IS_EXCEPTION;
*stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
if (!*stack_end)
return STACK_IS_UNKNOWN;
irq_stack = *stack_end - irq_stack_size;
if (in_irq_stack(stack, irq_stack, *stack_end))
return STACK_IS_IRQ;
return STACK_IS_UNKNOWN;
}
/* /*
* x86-64 can have up to three kernel stacks: * x86-64 can have up to three kernel stacks:
* process stack * process stack
@ -116,12 +155,11 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data) const struct stacktrace_ops *ops, void *data)
{ {
const unsigned cpu = get_cpu(); const unsigned cpu = get_cpu();
unsigned long *irq_stack_end =
(unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned used = 0;
struct thread_info *tinfo; struct thread_info *tinfo;
int graph = 0; unsigned long *irq_stack;
unsigned long dummy; unsigned long dummy;
int graph = 0;
int done = 0;
if (!task) if (!task)
task = current; task = current;
@ -143,49 +181,60 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
* exceptions * exceptions
*/ */
tinfo = task_thread_info(task); tinfo = task_thread_info(task);
for (;;) { while (!done) {
unsigned long *stack_end;
enum stack_type stype;
char *id; char *id;
unsigned long *estack_end;
estack_end = in_exception_stack(cpu, (unsigned long)stack,
&used, &id);
if (estack_end) { stype = analyze_stack(cpu, task, stack, &stack_end, &id);
/* Default finish unless specified to continue */
done = 1;
switch (stype) {
/* Break out early if we are on the thread stack */
case STACK_IS_NORMAL:
break;
case STACK_IS_EXCEPTION:
if (ops->stack(data, id) < 0) if (ops->stack(data, id) < 0)
break; break;
bp = ops->walk_stack(tinfo, stack, bp, ops, bp = ops->walk_stack(tinfo, stack, bp, ops,
data, estack_end, &graph); data, stack_end, &graph);
ops->stack(data, "<EOE>"); ops->stack(data, "<EOE>");
/* /*
* We link to the next stack via the * We link to the next stack via the
* second-to-last pointer (index -2 to end) in the * second-to-last pointer (index -2 to end) in the
* exception stack: * exception stack:
*/ */
stack = (unsigned long *) estack_end[-2]; stack = (unsigned long *) stack_end[-2];
continue; done = 0;
} break;
if (irq_stack_end) {
unsigned long *irq_stack;
irq_stack = irq_stack_end -
(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
if (in_irq_stack(stack, irq_stack, irq_stack_end)) { case STACK_IS_IRQ:
if (ops->stack(data, "IRQ") < 0)
break; if (ops->stack(data, "IRQ") < 0)
bp = ops->walk_stack(tinfo, stack, bp, break;
ops, data, irq_stack_end, &graph); bp = ops->walk_stack(tinfo, stack, bp,
/* ops, data, stack_end, &graph);
* We link to the next stack (which would be /*
* the process stack normally) the last * We link to the next stack (which would be
* pointer (index -1 to end) in the IRQ stack: * the process stack normally) the last
*/ * pointer (index -1 to end) in the IRQ stack:
stack = (unsigned long *) (irq_stack_end[-1]); */
irq_stack_end = NULL; stack = (unsigned long *) (stack_end[-1]);
ops->stack(data, "EOI"); irq_stack = stack_end - irq_stack_size;
continue; ops->stack(data, "EOI");
} done = 0;
break;
case STACK_IS_UNKNOWN:
ops->stack(data, "UNK");
break;
} }
break;
} }
/* /*

View File

@ -55,16 +55,8 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { } static inline void print_stack_overflow(void) { }
#endif #endif
/* DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
* per-CPU IRQ handling contexts (thread information and stack) DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
*/
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
} __attribute__((aligned(THREAD_SIZE)));
static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
static void call_on_stack(void *func, void *stack) static void call_on_stack(void *func, void *stack)
{ {
@ -77,14 +69,26 @@ static void call_on_stack(void *func, void *stack)
: "memory", "cc", "edx", "ecx", "eax"); : "memory", "cc", "edx", "ecx", "eax");
} }
/* how to get the current stack pointer from C */
#define current_stack_pointer ({ \
unsigned long sp; \
asm("mov %%esp,%0" : "=g" (sp)); \
sp; \
})
static inline void *current_stack(void)
{
return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int static inline int
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
{ {
union irq_ctx *curctx, *irqctx; struct irq_stack *curstk, *irqstk;
u32 *isp, arg1, arg2; u32 *isp, *prev_esp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info(); curstk = (struct irq_stack *) current_stack();
irqctx = __this_cpu_read(hardirq_ctx); irqstk = __this_cpu_read(hardirq_stack);
/* /*
* this is where we switch to the IRQ stack. However, if we are * this is where we switch to the IRQ stack. However, if we are
@ -92,13 +96,14 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
* handler) we can't do that and just have to keep using the * handler) we can't do that and just have to keep using the
* current stack (which is the irq stack already after all) * current stack (which is the irq stack already after all)
*/ */
if (unlikely(curctx == irqctx)) if (unlikely(curstk == irqstk))
return 0; return 0;
/* build the stack frame on the IRQ stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
irqctx->tinfo.task = curctx->tinfo.task; /* Save the next esp at the bottom of the stack */
irqctx->tinfo.previous_esp = current_stack_pointer; prev_esp = (u32 *)irqstk;
*prev_esp = current_stack_pointer;
if (unlikely(overflow)) if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp); call_on_stack(print_stack_overflow, isp);
@ -118,46 +123,40 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
*/ */
void irq_ctx_init(int cpu) void irq_ctx_init(int cpu)
{ {
union irq_ctx *irqctx; struct irq_stack *irqstk;
if (per_cpu(hardirq_ctx, cpu)) if (per_cpu(hardirq_stack, cpu))
return; return;
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
THREADINFO_GFP, THREADINFO_GFP,
THREAD_SIZE_ORDER)); THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); per_cpu(hardirq_stack, cpu) = irqstk;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(hardirq_ctx, cpu) = irqctx; irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
THREADINFO_GFP, THREADINFO_GFP,
THREAD_SIZE_ORDER)); THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); per_cpu(softirq_stack, cpu) = irqstk;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(softirq_ctx, cpu) = irqctx;
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
} }
void do_softirq_own_stack(void) void do_softirq_own_stack(void)
{ {
struct thread_info *curctx; struct thread_info *curstk;
union irq_ctx *irqctx; struct irq_stack *irqstk;
u32 *isp; u32 *isp, *prev_esp;
curctx = current_thread_info(); curstk = current_stack();
irqctx = __this_cpu_read(softirq_ctx); irqstk = __this_cpu_read(softirq_stack);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
/* build the stack frame on the softirq stack */ /* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
/* Push the previous esp onto the stack */
prev_esp = (u32 *)irqstk;
*prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp); call_on_stack(__do_softirq, isp);
} }

View File

@ -314,6 +314,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/ */
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
/* /*
* Restore %gs if needed (which is common) * Restore %gs if needed (which is common)
*/ */

View File

@ -184,14 +184,14 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
{ {
unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
unsigned long sp = (unsigned long)&regs->sp; unsigned long sp = (unsigned long)&regs->sp;
struct thread_info *tinfo; u32 *prev_esp;
if (context == (sp & ~(THREAD_SIZE - 1))) if (context == (sp & ~(THREAD_SIZE - 1)))
return sp; return sp;
tinfo = (struct thread_info *)context; prev_esp = (u32 *)(context);
if (tinfo->previous_esp) if (prev_esp)
return tinfo->previous_esp; return (unsigned long)prev_esp;
return (unsigned long)regs; return (unsigned long)regs;
} }

View File

@ -766,10 +766,10 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
#else #else
clear_tsk_thread_flag(idle, TIF_FORK); clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu); initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) = per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) - (unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE; KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary; initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp; stack_start = idle->thread.sp;