From 2432e1364bbee83cb6e63e026c91785031704ba5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:28 -0500 Subject: [PATCH 1/6] x86: Nuke the supervisor_stack field in i386 thread_info Nothing references the supervisor_stack in the thread_info field, and it does not exist in x86_64. To make the two more the same, it is being removed. Acked-by: Thomas Gleixner Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110806012353.546183789@goodmis.org Link: http://lkml.kernel.org/r/20140206144321.203619611@goodmis.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e1940c06ed02..b7aa975561e2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -36,7 +36,6 @@ struct thread_info { unsigned long previous_esp; /* ESP of the previous stack in case of nested (IRQ) stacks */ - __u8 supervisor_stack[0]; #endif unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ From b807902a88c470eb06d0acdf3b6590f27f5dce81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 6 Feb 2014 09:41:29 -0500 Subject: [PATCH 2/6] x86: Nuke GET_THREAD_INFO_WITH_ESP() macro for i386 According to a git log -p, GET_THREAD_INFO_WITH_ESP() has only been defined and never been used. Get rid of it. Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20140206144321.409045251@goodmis.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b7aa975561e2..1cb3501636d2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -182,10 +182,6 @@ static inline struct thread_info *current_thread_info(void) movl $-THREAD_SIZE, reg; \ andl %esp, reg -/* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg - #endif #else /* X86_32 */ From 0788aa6a23cb9d693fc5040ec774b979f1e906cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:30 -0500 Subject: [PATCH 3/6] x86: Prepare removal of previous_esp from i386 thread_info structure The i386 thread_info contains a previous_esp field that is used to daisy chain the different stacks for dump_stack() (ie. irq, softirq, thread stacks). The goal is to eventual make i386 handling of thread_info the same as x86_64, which means that the thread_info will not be in the stack but as a per_cpu variable. We will no longer depend on thread_info being able to daisy chain different stacks as it will only exist in one location (the thread stack). By moving previous_esp to the end of thread_info and referencing it as an offset instead of using a thread_info field, this becomes a stepping stone to moving the thread_info. The offset to get to the previous stack is rather ugly in this patch, but this is only temporary and the prev_esp will be changed in the next commit. This commit is more for sanity checks of the change. Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Cc: Robert Richter Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110806012353.891757693@goodmis.org Link: http://lkml.kernel.org/r/20140206144321.608754481@goodmis.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 5 +++-- arch/x86/kernel/dumpstack_32.c | 11 ++++++++++- arch/x86/kernel/irq_32.c | 15 +++++++++++---- arch/x86/kernel/ptrace.c | 8 ++++---- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 1cb3501636d2..ca2de1b48fac 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -32,13 +32,14 @@ struct thread_info { mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; + unsigned int sig_on_uaccess_error:1; + unsigned int uaccess_err:1; /* uaccess failed */ #ifdef CONFIG_X86_32 unsigned long previous_esp; /* ESP of the previous stack in case of nested (IRQ) stacks + (Moved to end, to be removed soon) */ #endif - unsigned int sig_on_uaccess_error:1; - unsigned int uaccess_err:1; /* uaccess failed */ }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f2a1770ca176..187d6a749c19 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -22,6 +22,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { int graph = 0; + u32 *prev_esp; if (!task) task = current; @@ -44,9 +45,17 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ((unsigned long)stack & (~(THREAD_SIZE - 1))); bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); - stack = (unsigned long *)context->previous_esp; + /* Stop if not on irq stack */ + if (task_stack_page(task) == context) + break; + + /* The previous esp is just above the context */ + prev_esp = (u32 *) ((char *)context + sizeof(struct thread_info) - + sizeof(long)); + stack = (unsigned long *)*prev_esp; if (!stack) break; + if (ops->stack(data, "IRQ") < 0) break; touch_nmi_watchdog(); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d7fcbedc9c43..f135cc2ff301 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -81,7 +81,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { union irq_ctx *curctx, *irqctx; - u32 *isp, arg1, arg2; + u32 *isp, *prev_esp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); irqctx = __this_cpu_read(hardirq_ctx); @@ -98,7 +98,10 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) /* build the stack frame on the IRQ stack */ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; + /* Save the next esp after thread_info */ + prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - + sizeof(long)); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -149,16 +152,20 @@ void do_softirq_own_stack(void) { struct thread_info *curctx; union irq_ctx *irqctx; - u32 *isp; + u32 *isp, *prev_esp; curctx = current_thread_info(); irqctx = __this_cpu_read(softirq_ctx); irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + /* Push the previous esp onto the stack */ + prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - + sizeof(long)); + *prev_esp = current_stack_pointer; + call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7461f50d5bb1..f352a7cc43a1 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -184,14 +184,14 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) { unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); unsigned long sp = (unsigned long)®s->sp; - struct thread_info *tinfo; + u32 *prev_esp; if (context == (sp & ~(THREAD_SIZE - 1))) return sp; - tinfo = (struct thread_info *)context; - if (tinfo->previous_esp) - return tinfo->previous_esp; + prev_esp = (u32 *)(context + sizeof(struct thread_info) - sizeof(long)); + if (prev_esp) + return (unsigned long)prev_esp; return (unsigned long)regs; } From 198d208df4371734ac4728f69cb585c284d20a15 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:31 -0500 Subject: [PATCH 4/6] x86: Keep thread_info on thread stack in x86_32 x86_64 uses a per_cpu variable kernel_stack to always point to the thread stack of current. This is where the thread_info is stored and is accessed from this location even when the irq or exception stack is in use. This removes the complexity of having to maintain the thread info on the stack when interrupts are running and having to copy the preempt_count and other fields to the interrupt stack. x86_32 uses the old method of copying the thread_info from the thread stack to the exception stack just before executing the exception. Having the two different requires #ifdefs and also the x86_32 way is a bit of a pain to maintain. By converting x86_32 to the same method of x86_64, we can remove #ifdefs, clean up the x86_32 code a little, and remove the overhead of the copy. Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110806012354.263834829@goodmis.org Link: http://lkml.kernel.org/r/20140206144321.852942014@goodmis.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 9 ++++ arch/x86/include/asm/thread_info.h | 49 ++------------------ arch/x86/kernel/cpu/common.c | 8 ++-- arch/x86/kernel/dumpstack_32.c | 41 ++++++++++++++--- arch/x86/kernel/irq_32.c | 74 +++++++++++++----------------- arch/x86/kernel/process_32.c | 4 ++ arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 8 files changed, 89 insertions(+), 100 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fdedd38fd0fc..a4ea02351f4d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -449,6 +449,15 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif +/* + * per-CPU IRQ handling stacks + */ +struct irq_stack { + u32 stack[THREAD_SIZE/sizeof(u32)]; +} __aligned(THREAD_SIZE); + +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); +DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ca2de1b48fac..47e5de25ba79 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -9,6 +9,7 @@ #include #include +#include #include /* @@ -34,12 +35,6 @@ struct thread_info { void __user *sysenter_return; unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ -#ifdef CONFIG_X86_32 - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - (Moved to end, to be removed soon) - */ -#endif }; #define INIT_THREAD_INFO(tsk) \ @@ -153,9 +148,9 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) -#ifdef CONFIG_X86_32 +#define STACK_WARN (THREAD_SIZE/8) +#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) -#define STACK_WARN (THREAD_SIZE/8) /* * macros/functions for gaining access to the thread information structure * @@ -163,38 +158,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -#define current_stack_pointer ({ \ - unsigned long sp; \ - asm("mov %%esp,%0" : "=g" (sp)); \ - sp; \ -}) - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg - -#endif - -#else /* X86_32 */ - -#include -#define KERNEL_STACK_OFFSET (5*8) - -/* - * macros/functions for gaining access to the thread information structure - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ DECLARE_PER_CPU(unsigned long, kernel_stack); static inline struct thread_info *current_thread_info(void) @@ -209,8 +172,8 @@ static inline struct thread_info *current_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq PER_CPU_VAR(kernel_stack),reg ; \ - subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg + _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ + _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; /* * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in @@ -220,8 +183,6 @@ static inline struct thread_info *current_thread_info(void) #endif -#endif /* !X86_32 */ - /* * Thread-synchronous status. * diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8e28bf2fc3ef..29c1944a98ac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1078,6 +1078,10 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, @@ -1094,10 +1098,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - DEFINE_PER_CPU(char *, irq_stack_ptr) = init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 187d6a749c19..dca820b627d6 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,11 +16,33 @@ #include +static void *is_irq_stack(void *p, void *irq) +{ + if (p < irq || p >= (irq + THREAD_SIZE)) + return NULL; + return irq + THREAD_SIZE; +} + + +static void *is_hardirq_stack(unsigned long *stack, int cpu) +{ + void *irq = per_cpu(hardirq_stack, cpu); + + return is_irq_stack(stack, irq); +} + +static void *is_softirq_stack(unsigned long *stack, int cpu) +{ + void *irq = per_cpu(softirq_stack, cpu); + + return is_irq_stack(stack, irq); +} void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { + const unsigned cpu = get_cpu(); int graph = 0; u32 *prev_esp; @@ -40,18 +62,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, for (;;) { struct thread_info *context; + void *end_stack; - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); + end_stack = is_hardirq_stack(stack, cpu); + if (!end_stack) + end_stack = is_softirq_stack(stack, cpu); + + context = task_thread_info(task); + bp = ops->walk_stack(context, stack, bp, ops, data, + end_stack, &graph); /* Stop if not on irq stack */ - if (task_stack_page(task) == context) + if (!end_stack) break; - /* The previous esp is just above the context */ - prev_esp = (u32 *) ((char *)context + sizeof(struct thread_info) - - sizeof(long)); + /* The previous esp is saved on the bottom of the stack */ + prev_esp = (u32 *)(end_stack - THREAD_SIZE); stack = (unsigned long *)*prev_esp; if (!stack) break; @@ -60,6 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, break; touch_nmi_watchdog(); } + put_cpu(); } EXPORT_SYMBOL(dump_trace); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f135cc2ff301..988dc8bcaebf 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -55,16 +55,8 @@ static inline int check_stack_overflow(void) { return 0; } static inline void print_stack_overflow(void) { } #endif -/* - * per-CPU IRQ handling contexts (thread information and stack) - */ -union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -} __attribute__((aligned(THREAD_SIZE))); - -static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); -static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); +DEFINE_PER_CPU(struct irq_stack *, hardirq_stack); +DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { @@ -77,14 +69,22 @@ static void call_on_stack(void *func, void *stack) : "memory", "cc", "edx", "ecx", "eax"); } +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; + +static inline void *current_stack(void) +{ + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); +} + static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { - union irq_ctx *curctx, *irqctx; + struct irq_stack *curstk, *irqstk; u32 *isp, *prev_esp, arg1, arg2; - curctx = (union irq_ctx *) current_thread_info(); - irqctx = __this_cpu_read(hardirq_ctx); + curstk = (struct irq_stack *) current_stack(); + irqstk = __this_cpu_read(hardirq_stack); /* * this is where we switch to the IRQ stack. However, if we are @@ -92,15 +92,13 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) * handler) we can't do that and just have to keep using the * current stack (which is the irq stack already after all) */ - if (unlikely(curctx == irqctx)) + if (unlikely(curstk == irqstk)) return 0; - /* build the stack frame on the IRQ stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - /* Save the next esp after thread_info */ - prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - - sizeof(long)); + isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); + + /* Save the next esp at the bottom of the stack */ + prev_esp = (u32 *)irqstk; *prev_esp = current_stack_pointer; if (unlikely(overflow)) @@ -121,49 +119,39 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) */ void irq_ctx_init(int cpu) { - union irq_ctx *irqctx; + struct irq_stack *irqstk; - if (per_cpu(hardirq_ctx, cpu)) + if (per_cpu(hardirq_stack, cpu)) return; - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), + irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + per_cpu(hardirq_stack, cpu) = irqstk; - per_cpu(hardirq_ctx, cpu) = irqctx; - - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), + irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(softirq_ctx, cpu) = irqctx; + per_cpu(softirq_stack, cpu) = irqstk; printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", - cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); + cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); } void do_softirq_own_stack(void) { - struct thread_info *curctx; - union irq_ctx *irqctx; + struct thread_info *curstk; + struct irq_stack *irqstk; u32 *isp, *prev_esp; - curctx = current_thread_info(); - irqctx = __this_cpu_read(softirq_ctx); - irqctx->tinfo.task = curctx->task; + curstk = current_stack(); + irqstk = __this_cpu_read(softirq_stack); /* build the stack frame on the softirq stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); /* Push the previous esp onto the stack */ - prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - - sizeof(long)); + prev_esp = (u32 *)irqstk; *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0de43e98ce08..7bc86bbe7485 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -314,6 +314,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); + this_cpu_write(kernel_stack, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - KERNEL_STACK_OFFSET); + /* * Restore %gs if needed (which is common) */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f352a7cc43a1..678c0ada3b3c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -189,7 +189,7 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) if (context == (sp & ~(THREAD_SIZE - 1))) return sp; - prev_esp = (u32 *)(context + sizeof(struct thread_info) - sizeof(long)); + prev_esp = (u32 *)(context); if (prev_esp) return (unsigned long)prev_esp; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a32da804252e..867d53ea88a3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -758,10 +758,10 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) #else clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); +#endif per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; -#endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; From 2223f6f6eeaad6ea0a3acd3f4bc1ae45e8117ddc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:32 -0500 Subject: [PATCH 5/6] x86: Clean up dumpstack_64.c code The dump_trace() function in dumpstack_64.c is hard to follow. The test for exception stack is processed differently than the test for irq stack, and the normal stack is outside completely. By restructuring this code to have all the stacks determined by a single function that returns an enum of the following: STACK_IS_NORMAL STACK_IS_EXCEPTION STACK_IS_IRQ STACK_IS_UNKNOWN and has the logic of each within a switch statement. This should make the code much easier to read and understand. Link: http://lkml.kernel.org/r/20110806012354.684598995@goodmis.org Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20140206144322.086050042@goodmis.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/dumpstack_64.c | 115 +++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 33 deletions(-) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index addb207dab92..346b1df2412e 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -104,6 +104,45 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack, return (stack >= irq_stack && stack < irq_stack_end); } +static const unsigned long irq_stack_size = + (IRQ_STACK_SIZE - 64) / sizeof(unsigned long); + +enum stack_type { + STACK_IS_UNKNOWN, + STACK_IS_NORMAL, + STACK_IS_EXCEPTION, + STACK_IS_IRQ, +}; + +static enum stack_type +analyze_stack(int cpu, struct task_struct *task, + unsigned long *stack, unsigned long **stack_end, char **id) +{ + unsigned long *irq_stack; + unsigned long addr; + unsigned used = 0; + + addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); + if ((unsigned long)task_stack_page(task) == addr) + return STACK_IS_NORMAL; + + *stack_end = in_exception_stack(cpu, (unsigned long)stack, + &used, id); + if (*stack_end) + return STACK_IS_EXCEPTION; + + *stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu); + if (!*stack_end) + return STACK_IS_UNKNOWN; + + irq_stack = *stack_end - irq_stack_size; + + if (in_irq_stack(stack, irq_stack, *stack_end)) + return STACK_IS_IRQ; + + return STACK_IS_UNKNOWN; +} + /* * x86-64 can have up to three kernel stacks: * process stack @@ -116,12 +155,11 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irq_stack_end = - (unsigned long *)per_cpu(irq_stack_ptr, cpu); - unsigned used = 0; struct thread_info *tinfo; - int graph = 0; + unsigned long *irq_stack; unsigned long dummy; + int graph = 0; + int done = 0; if (!task) task = current; @@ -143,49 +181,60 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * exceptions */ tinfo = task_thread_info(task); - for (;;) { + while (!done) { + unsigned long *stack_end; + enum stack_type stype; char *id; - unsigned long *estack_end; - estack_end = in_exception_stack(cpu, (unsigned long)stack, - &used, &id); - if (estack_end) { + stype = analyze_stack(cpu, task, stack, &stack_end, &id); + + /* Default finish unless specified to continue */ + done = 1; + + switch (stype) { + + /* Break out early if we are on the thread stack */ + case STACK_IS_NORMAL: + break; + + case STACK_IS_EXCEPTION: + if (ops->stack(data, id) < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); + data, stack_end, &graph); ops->stack(data, ""); /* * We link to the next stack via the * second-to-last pointer (index -2 to end) in the * exception stack: */ - stack = (unsigned long *) estack_end[-2]; - continue; - } - if (irq_stack_end) { - unsigned long *irq_stack; - irq_stack = irq_stack_end - - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); + stack = (unsigned long *) stack_end[-2]; + done = 0; + break; - if (in_irq_stack(stack, irq_stack, irq_stack_end)) { - if (ops->stack(data, "IRQ") < 0) - break; - bp = ops->walk_stack(tinfo, stack, bp, - ops, data, irq_stack_end, &graph); - /* - * We link to the next stack (which would be - * the process stack normally) the last - * pointer (index -1 to end) in the IRQ stack: - */ - stack = (unsigned long *) (irq_stack_end[-1]); - irq_stack_end = NULL; - ops->stack(data, "EOI"); - continue; - } + case STACK_IS_IRQ: + + if (ops->stack(data, "IRQ") < 0) + break; + bp = ops->walk_stack(tinfo, stack, bp, + ops, data, stack_end, &graph); + /* + * We link to the next stack (which would be + * the process stack normally) the last + * pointer (index -1 to end) in the IRQ stack: + */ + stack = (unsigned long *) (stack_end[-1]); + irq_stack = stack_end - irq_stack_size; + ops->stack(data, "EOI"); + done = 0; + break; + + case STACK_IS_UNKNOWN: + ops->stack(data, "UNK"); + break; } - break; } /* From 6cce16f99d7be23cec7cabdf32a8166eec6e5393 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 7 Mar 2014 08:52:32 +0100 Subject: [PATCH 6/6] x86, threadinfo: Redo "x86: Use inline assembler to get sp" This patch restores the changes of commit dff38e3e93 "x86: Use inline assembler instead of global register variable to get sp". They got lost in commit 198d208df4 "x86: Keep thread_info on thread stack in x86_32" while moving the code to arch/x86/kernel/irq_32.c. Quoting Andi from commit dff38e3e93: """ LTO in gcc 4.6/47. has trouble with global register variables. They were used to read the stack pointer. Use a simple inline assembler statement with a mov instead. This also helps LLVM/clang, which does not support global register variables. """ Cc: Steven Rostedt Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Cc: Andi Kleen Signed-off-by: Mathias Krause Link: http://lkml.kernel.org/r/1394178752-18047-1-git-send-email-minipli@googlemail.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/irq_32.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 988dc8bcaebf..63ce838e5a54 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -70,7 +70,11 @@ static void call_on_stack(void *func, void *stack) } /* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; +#define current_stack_pointer ({ \ + unsigned long sp; \ + asm("mov %%esp,%0" : "=g" (sp)); \ + sp; \ +}) static inline void *current_stack(void) {