The following execution path is possible:
  fsnotify()
    [ realign the stack and store previous SP in R10 ]
    <IRQ>
      [ only IRET regs saved ]
      common_interrupt()
        interrupt_entry()
	  <NMI>
	    [ full pt_regs saved ]
	    ...
	    [ unwind stack ]
When the unwinder goes through the NMI and the IRQ on the stack, and
then sees fsnotify(), it doesn't have access to the value of R10,
because it only has the five IRET registers.  So the unwind stops
prematurely.
However, because the interrupt_entry() code is careful not to clobber
R10 before saving the full regs, the unwinder should be able to read R10
from the previously saved full pt_regs associated with the NMI.
Handle this case properly.  When encountering an IRET regs frame
immediately after a full pt_regs frame, use the pt_regs as a backup
which can be used to get the C register values.
Also, note that a call frame resets the 'prev_regs' value, because a
function is free to clobber the registers.  For this fix to work, the
IRET and full regs frames must be adjacent, with no FUNC frames in
between.  So replace the FUNC hint in interrupt_entry() with an
IRET_REGS hint.
Fixes: ee9f8fce99 ("x86/unwind: Add the ORC unwinder")
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Jones <dsj@fb.com>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lore.kernel.org/r/97a408167cc09f1cfa0de31a7b70dd88868d743f.1587808742.git.jpoimboe@redhat.com
		
	
			
		
			
				
	
	
		
			127 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			127 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| #ifndef _ASM_X86_UNWIND_H
 | |
| #define _ASM_X86_UNWIND_H
 | |
| 
 | |
| #include <linux/sched.h>
 | |
| #include <linux/ftrace.h>
 | |
| #include <asm/ptrace.h>
 | |
| #include <asm/stacktrace.h>
 | |
| 
 | |
| #define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
 | |
| #define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
 | |
| 
 | |
| struct unwind_state {
 | |
| 	struct stack_info stack_info;
 | |
| 	unsigned long stack_mask;
 | |
| 	struct task_struct *task;
 | |
| 	int graph_idx;
 | |
| 	bool error;
 | |
| #if defined(CONFIG_UNWINDER_ORC)
 | |
| 	bool signal, full_regs;
 | |
| 	unsigned long sp, bp, ip;
 | |
| 	struct pt_regs *regs, *prev_regs;
 | |
| #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
 | |
| 	bool got_irq;
 | |
| 	unsigned long *bp, *orig_sp, ip;
 | |
| 	/*
 | |
| 	 * If non-NULL: The current frame is incomplete and doesn't contain a
 | |
| 	 * valid BP. When looking for the next frame, use this instead of the
 | |
| 	 * non-existent saved BP.
 | |
| 	 */
 | |
| 	unsigned long *next_bp;
 | |
| 	struct pt_regs *regs;
 | |
| #else
 | |
| 	unsigned long *sp;
 | |
| #endif
 | |
| };
 | |
| 
 | |
| void __unwind_start(struct unwind_state *state, struct task_struct *task,
 | |
| 		    struct pt_regs *regs, unsigned long *first_frame);
 | |
| bool unwind_next_frame(struct unwind_state *state);
 | |
| unsigned long unwind_get_return_address(struct unwind_state *state);
 | |
| unsigned long *unwind_get_return_address_ptr(struct unwind_state *state);
 | |
| 
 | |
| static inline bool unwind_done(struct unwind_state *state)
 | |
| {
 | |
| 	return state->stack_info.type == STACK_TYPE_UNKNOWN;
 | |
| }
 | |
| 
 | |
| static inline bool unwind_error(struct unwind_state *state)
 | |
| {
 | |
| 	return state->error;
 | |
| }
 | |
| 
 | |
| static inline
 | |
| void unwind_start(struct unwind_state *state, struct task_struct *task,
 | |
| 		  struct pt_regs *regs, unsigned long *first_frame)
 | |
| {
 | |
| 	first_frame = first_frame ? : get_stack_pointer(task, regs);
 | |
| 
 | |
| 	__unwind_start(state, task, regs, first_frame);
 | |
| }
 | |
| 
 | |
| #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
 | |
| /*
 | |
|  * If 'partial' returns true, only the iret frame registers are valid.
 | |
|  */
 | |
| static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
 | |
| 						    bool *partial)
 | |
| {
 | |
| 	if (unwind_done(state))
 | |
| 		return NULL;
 | |
| 
 | |
| 	if (partial) {
 | |
| #ifdef CONFIG_UNWINDER_ORC
 | |
| 		*partial = !state->full_regs;
 | |
| #else
 | |
| 		*partial = false;
 | |
| #endif
 | |
| 	}
 | |
| 
 | |
| 	return state->regs;
 | |
| }
 | |
| #else
 | |
| static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
 | |
| 						    bool *partial)
 | |
| {
 | |
| 	return NULL;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifdef CONFIG_UNWINDER_ORC
 | |
| void unwind_init(void);
 | |
| void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 | |
| 			void *orc, size_t orc_size);
 | |
| #else
 | |
| static inline void unwind_init(void) {}
 | |
| static inline
 | |
| void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 | |
| 			void *orc, size_t orc_size) {}
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * This disables KASAN checking when reading a value from another task's stack,
 | |
|  * since the other task could be running on another CPU and could have poisoned
 | |
|  * the stack in the meantime.
 | |
|  */
 | |
| #define READ_ONCE_TASK_STACK(task, x)			\
 | |
| ({							\
 | |
| 	unsigned long val;				\
 | |
| 	if (task == current)				\
 | |
| 		val = READ_ONCE(x);			\
 | |
| 	else						\
 | |
| 		val = READ_ONCE_NOCHECK(x);		\
 | |
| 	val;						\
 | |
| })
 | |
| 
 | |
| static inline bool task_on_another_cpu(struct task_struct *task)
 | |
| {
 | |
| #ifdef CONFIG_SMP
 | |
| 	return task != current && task->on_cpu;
 | |
| #else
 | |
| 	return false;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #endif /* _ASM_X86_UNWIND_H */
 |