2019-06-03 05:44:50 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Stack tracing support
|
|
|
|
*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/export.h>
|
2015-12-15 08:33:41 +00:00
|
|
|
#include <linux/ftrace.h>
|
2019-07-25 08:16:05 +00:00
|
|
|
#include <linux/kprobes.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <linux/sched.h>
|
2017-02-08 17:51:35 +00:00
|
|
|
#include <linux/sched/debug.h>
|
2017-02-08 17:51:37 +00:00
|
|
|
#include <linux/sched/task_stack.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <linux/stacktrace.h>
|
|
|
|
|
2015-12-04 11:02:26 +00:00
|
|
|
#include <asm/irq.h>
|
2020-03-13 09:04:59 +00:00
|
|
|
#include <asm/pointer_auth.h>
|
2016-11-03 20:23:05 +00:00
|
|
|
#include <asm/stack_pointer.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <asm/stacktrace.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AArch64 PCS assigns the frame pointer to x29.
|
|
|
|
*
|
|
|
|
* A simple function prologue looks like this:
|
|
|
|
* sub sp, sp, #0x10
|
|
|
|
* stp x29, x30, [sp]
|
|
|
|
* mov x29, sp
|
|
|
|
*
|
|
|
|
* A simple function epilogue looks like this:
|
|
|
|
* mov sp, x29
|
|
|
|
* ldp x29, x30, [sp]
|
|
|
|
* add sp, sp, #0x10
|
|
|
|
*/
|
2019-07-02 13:07:29 +00:00
|
|
|
|
2021-03-19 17:40:22 +00:00
|
|
|
|
|
|
|
void start_backtrace(struct stackframe *frame, unsigned long fp,
|
|
|
|
unsigned long pc)
|
|
|
|
{
|
|
|
|
frame->fp = fp;
|
|
|
|
frame->pc = pc;
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
|
|
frame->graph = 0;
|
|
|
|
#endif
|
2021-10-21 00:55:09 +00:00
|
|
|
#ifdef CONFIG_KRETPROBES
|
|
|
|
frame->kr_cur = NULL;
|
|
|
|
#endif
|
2021-03-19 17:40:22 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Prime the first unwind.
|
|
|
|
*
|
|
|
|
* In unwind_frame() we'll check that the FP points to a valid stack,
|
|
|
|
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
|
|
|
|
* treated as a transition to whichever stack that happens to be. The
|
|
|
|
* prev_fp value won't be used, but we set it to 0 such that it is
|
|
|
|
* definitely not an accessible stack address.
|
|
|
|
*/
|
|
|
|
bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
|
|
|
|
frame->prev_fp = 0;
|
|
|
|
frame->prev_type = STACK_TYPE_UNKNOWN;
|
|
|
|
}
|
|
|
|
|
2019-07-02 13:07:29 +00:00
|
|
|
/*
|
|
|
|
* Unwind from one frame record (A) to the next frame record (B).
|
|
|
|
*
|
|
|
|
* We terminate early if the location of B indicates a malformed chain of frame
|
|
|
|
* records (e.g. a cycle), determined based on the location and fp value of A
|
|
|
|
* and the location (but not the fp value) of B.
|
|
|
|
*/
|
2015-12-15 08:33:40 +00:00
|
|
|
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
|
|
|
unsigned long fp = frame->fp;
|
2019-07-02 13:07:29 +00:00
|
|
|
struct stack_info info;
|
2017-07-22 11:48:34 +00:00
|
|
|
|
arm64: fix dump_backtrace/unwind_frame with NULL tsk
In some places, dump_backtrace() is called with a NULL tsk parameter,
e.g. in bug_handler() in arch/arm64, or indirectly via show_stack() in
core code. The expectation is that this is treated as if current were
passed instead of NULL. Similar is true of unwind_frame().
Commit a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") didn't
take this into account. In dump_backtrace() it compares tsk against
current *before* we check if tsk is NULL, and in unwind_frame() we never
set tsk if it is NULL.
Due to this, we won't initialise irq_stack_ptr in either function. In
dump_backtrace() this results in calling dump_mem() for memory
immediately above the IRQ stack range, rather than for the relevant
range on the task stack. In unwind_frame we'll reject unwinding frames
on the IRQ stack.
In either case this results in incomplete or misleading backtrace
information, but is not otherwise problematic. The initial percpu areas
(including the IRQ stacks) are allocated in the linear map, and dump_mem
uses __get_user(), so we shouldn't access anything with side-effects,
and will handle holes safely.
This patch fixes the issue by having both functions handle the NULL tsk
case before doing anything else with tsk.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Fixes: a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust")
Acked-by: James Morse <james.morse@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-09-23 16:55:05 +00:00
|
|
|
if (!tsk)
|
|
|
|
tsk = current;
|
|
|
|
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
/* Final frame; nothing to unwind */
|
|
|
|
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
|
|
|
|
return -ENOENT;
|
|
|
|
|
2021-05-26 17:49:26 +00:00
|
|
|
if (fp & 0x7)
|
arm64: Implement stack trace termination record
Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.
We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().
Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.
Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.
External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-10 11:00:26 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2021-05-26 17:49:25 +00:00
|
|
|
if (!on_accessible_stack(tsk, fp, 16, &info))
|
2012-03-05 11:49:27 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2019-07-02 13:07:29 +00:00
|
|
|
if (test_bit(info.type, frame->stacks_done))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* As stacks grow downward, any valid record on the same stack must be
|
|
|
|
* at a strictly higher address than the prior record.
|
|
|
|
*
|
|
|
|
* Stacks can nest in several valid orders, e.g.
|
|
|
|
*
|
|
|
|
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
|
|
|
|
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
|
|
|
|
*
|
|
|
|
* ... but the nesting itself is strict. Once we transition from one
|
|
|
|
* stack to another, it's never valid to unwind back to that first
|
|
|
|
* stack.
|
|
|
|
*/
|
|
|
|
if (info.type == frame->prev_type) {
|
|
|
|
if (fp <= frame->prev_fp)
|
|
|
|
return -EINVAL;
|
|
|
|
} else {
|
|
|
|
set_bit(frame->prev_type, frame->stacks_done);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Record this frame record's values and location. The prev_fp and
|
|
|
|
* prev_type are only meaningful to the next unwind_frame() invocation.
|
|
|
|
*/
|
2016-02-08 17:13:09 +00:00
|
|
|
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
|
|
|
|
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
|
2019-07-02 13:07:29 +00:00
|
|
|
frame->prev_fp = fp;
|
|
|
|
frame->prev_type = info.type;
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-12-15 08:33:41 +00:00
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
arm64: fix dump_backtrace/unwind_frame with NULL tsk
In some places, dump_backtrace() is called with a NULL tsk parameter,
e.g. in bug_handler() in arch/arm64, or indirectly via show_stack() in
core code. The expectation is that this is treated as if current were
passed instead of NULL. Similar is true of unwind_frame().
Commit a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") didn't
take this into account. In dump_backtrace() it compares tsk against
current *before* we check if tsk is NULL, and in unwind_frame() we never
set tsk if it is NULL.
Due to this, we won't initialise irq_stack_ptr in either function. In
dump_backtrace() this results in calling dump_mem() for memory
immediately above the IRQ stack range, rather than for the relevant
range on the task stack. In unwind_frame we'll reject unwinding frames
on the IRQ stack.
In either case this results in incomplete or misleading backtrace
information, but is not otherwise problematic. The initial percpu areas
(including the IRQ stacks) are allocated in the linear map, and dump_mem
uses __get_user(), so we shouldn't access anything with side-effects,
and will handle holes safely.
This patch fixes the issue by having both functions handle the NULL tsk
case before doing anything else with tsk.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Fixes: a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust")
Acked-by: James Morse <james.morse@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-09-23 16:55:05 +00:00
|
|
|
if (tsk->ret_stack &&
|
2020-03-13 09:04:59 +00:00
|
|
|
(ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
|
2018-12-07 18:13:28 +00:00
|
|
|
struct ftrace_ret_stack *ret_stack;
|
2015-12-15 08:33:41 +00:00
|
|
|
/*
|
|
|
|
* This is a case where function graph tracer has
|
|
|
|
* modified a return address (LR) in a stack frame
|
|
|
|
* to hook a function return.
|
|
|
|
* So replace it to an original value.
|
|
|
|
*/
|
2018-12-07 18:13:28 +00:00
|
|
|
ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
|
|
|
|
if (WARN_ON_ONCE(!ret_stack))
|
|
|
|
return -EINVAL;
|
|
|
|
frame->pc = ret_stack->ret;
|
2015-12-15 08:33:41 +00:00
|
|
|
}
|
|
|
|
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
2021-10-21 00:55:09 +00:00
|
|
|
#ifdef CONFIG_KRETPROBES
|
|
|
|
if (is_kretprobe_trampoline(frame->pc))
|
|
|
|
frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
|
|
|
|
#endif
|
2015-12-15 08:33:41 +00:00
|
|
|
|
2020-03-13 09:04:59 +00:00
|
|
|
frame->pc = ptrauth_strip_insn_pac(frame->pc);
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2019-07-25 08:16:05 +00:00
|
|
|
NOKPROBE_SYMBOL(unwind_frame);
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-12-15 08:33:40 +00:00
|
|
|
void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
|
2020-09-14 15:34:08 +00:00
|
|
|
bool (*fn)(void *, unsigned long), void *data)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
|
|
|
while (1) {
|
|
|
|
int ret;
|
|
|
|
|
2020-09-14 15:34:08 +00:00
|
|
|
if (!fn(data, frame->pc))
|
2012-03-05 11:49:27 +00:00
|
|
|
break;
|
2015-12-15 08:33:40 +00:00
|
|
|
ret = unwind_frame(tsk, frame);
|
2012-03-05 11:49:27 +00:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2019-07-25 08:16:05 +00:00
|
|
|
NOKPROBE_SYMBOL(walk_stackframe);
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2020-09-21 12:23:41 +00:00
|
|
|
static void dump_backtrace_entry(unsigned long where, const char *loglvl)
|
|
|
|
{
|
2021-07-08 01:09:24 +00:00
|
|
|
printk("%s %pSb\n", loglvl, (void *)where);
|
2020-09-21 12:23:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
|
|
|
|
const char *loglvl)
|
|
|
|
{
|
|
|
|
struct stackframe frame;
|
|
|
|
int skip = 0;
|
|
|
|
|
|
|
|
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
|
|
|
|
|
|
|
|
if (regs) {
|
|
|
|
if (user_mode(regs))
|
|
|
|
return;
|
|
|
|
skip = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!tsk)
|
|
|
|
tsk = current;
|
|
|
|
|
|
|
|
if (!try_get_task_stack(tsk))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (tsk == current) {
|
|
|
|
start_backtrace(&frame,
|
|
|
|
(unsigned long)__builtin_frame_address(0),
|
|
|
|
(unsigned long)dump_backtrace);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* task blocked in __switch_to
|
|
|
|
*/
|
|
|
|
start_backtrace(&frame,
|
|
|
|
thread_saved_fp(tsk),
|
|
|
|
thread_saved_pc(tsk));
|
|
|
|
}
|
|
|
|
|
|
|
|
printk("%sCall trace:\n", loglvl);
|
|
|
|
do {
|
|
|
|
/* skip until specified stack frame */
|
|
|
|
if (!skip) {
|
|
|
|
dump_backtrace_entry(frame.pc, loglvl);
|
|
|
|
} else if (frame.fp == regs->regs[29]) {
|
|
|
|
skip = 0;
|
|
|
|
/*
|
|
|
|
* Mostly, this is the case where this function is
|
|
|
|
* called in panic/abort. As exception handler's
|
|
|
|
* stack frame does not contain the corresponding pc
|
|
|
|
* at which an exception has taken place, use regs->pc
|
|
|
|
* instead.
|
|
|
|
*/
|
|
|
|
dump_backtrace_entry(regs->pc, loglvl);
|
|
|
|
}
|
|
|
|
} while (!unwind_frame(tsk, &frame));
|
|
|
|
|
|
|
|
put_task_stack(tsk);
|
|
|
|
}
|
|
|
|
|
|
|
|
void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
|
|
|
|
{
|
|
|
|
dump_backtrace(NULL, tsk, loglvl);
|
|
|
|
barrier();
|
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
#ifdef CONFIG_STACKTRACE
|
|
|
|
|
arm64: stacktrace: avoid tracing arch_stack_walk()
When the function_graph tracer is in use, arch_stack_walk() may unwind
the stack incorrectly, erroneously reporting itself, missing the final
entry which is being traced, and reporting all traced entries between
these off-by-one from where they should be.
When ftrace hooks a function return, the original return address is
saved to the fgraph ret_stack, and the return address in the LR (or the
function's frame record) is replaced with `return_to_handler`.
When arm64's unwinder encounter frames returning to `return_to_handler`,
it finds the associated original return address from the fgraph ret
stack, assuming the most recent `ret_to_hander` entry on the stack
corresponds to the most recent entry in the fgraph ret stack, and so on.
When arch_stack_walk() is used to dump the current task's stack, it
starts from the caller of arch_stack_walk(). However, arch_stack_walk()
can be traced, and so may push an entry on to the fgraph ret stack,
leaving the fgraph ret stack offset by one from the expected position.
This can be seen when dumping the stack via /proc/self/stack, where
enabling the graph tracer results in an unexpected
`stack_trace_save_tsk` entry at the start of the trace, and `el0_svc`
missing form the end of the trace.
This patch fixes this by marking arch_stack_walk() as notrace, as we do
for all other functions on the path to ftrace_graph_get_ret_stack().
While a few helper functions are not marked notrace, their calls/returns
are balanced, and will have no observable effect when examining the
fgraph ret stack.
It is possible for an exeption boundary to cause a similar offset if the
return address of the interrupted context was in the LR. Fixing those
cases will require some more substantial rework, and is left for
subsequent patches.
Before:
| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c
| # echo function_graph > /sys/kernel/tracing/current_tracer
| # cat /proc/self/stack
| [<0>] stack_trace_save_tsk+0xa4/0x110
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c
After:
| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c
| # echo function_graph > /sys/kernel/tracing/current_tracer
| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c
Cc: <stable@vger.kernel.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviwed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210802164845.45506-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-08-02 16:48:45 +00:00
|
|
|
noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
arm64: stacktrace: don't trace arch_stack_walk()
We recently converted arm64 to use arch_stack_walk() in commit:
5fc57df2f6fd ("arm64: stacktrace: Convert to ARCH_STACKWALK")
The core stacktrace code expects that (when tracing the current task)
arch_stack_walk() starts a trace at its caller, and does not include
itself in the trace. However, arm64's arch_stack_walk() includes itself,
and so traces include one more entry than callers expect. The core
stacktrace code which calls arch_stack_walk() tries to skip a number of
entries to prevent itself appearing in a trace, and the additional entry
prevents skipping one of the core stacktrace functions, leaving this in
the trace unexpectedly.
We can fix this by having arm64's arch_stack_walk() begin the trace with
its caller. The first value returned by the trace will be
__builtin_return_address(0), i.e. the caller of arch_stack_walk(). The
first frame record to be unwound will be __builtin_frame_address(1),
i.e. the caller's frame record. To prevent surprises, arch_stack_walk()
is also marked noinline.
While __builtin_frame_address(1) is not safe in portable code, local GCC
developers have confirmed that it is safe on arm64. To find the caller's
frame record, the builtin can safely dereference the current function's
frame record or (in theory) could stash the original FP into another GPR
at function entry time, neither of which are problematic.
Prior to this patch, the tracing code would unexpectedly show up in
traces of the current task, e.g.
| # cat /proc/self/stack
| [<0>] stack_trace_save_tsk+0x98/0x100
| [<0>] proc_pid_stack+0xb4/0x130
| [<0>] proc_single_show+0x60/0x110
| [<0>] seq_read_iter+0x230/0x4d0
| [<0>] seq_read+0xdc/0x130
| [<0>] vfs_read+0xac/0x1e0
| [<0>] ksys_read+0x6c/0xfc
| [<0>] __arm64_sys_read+0x20/0x30
| [<0>] el0_svc_common.constprop.0+0x60/0x120
| [<0>] do_el0_svc+0x24/0x90
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0_sync_handler+0x1a4/0x1b0
| [<0>] el0_sync+0x170/0x180
After this patch, the tracing code will not show up in such traces:
| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xb4/0x130
| [<0>] proc_single_show+0x60/0x110
| [<0>] seq_read_iter+0x230/0x4d0
| [<0>] seq_read+0xdc/0x130
| [<0>] vfs_read+0xac/0x1e0
| [<0>] ksys_read+0x6c/0xfc
| [<0>] __arm64_sys_read+0x20/0x30
| [<0>] el0_svc_common.constprop.0+0x60/0x120
| [<0>] do_el0_svc+0x24/0x90
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0_sync_handler+0x1a4/0x1b0
| [<0>] el0_sync+0x170/0x180
Erring on the side of caution, I've given this a spin with a bunch of
toolchains, verifying the output of /proc/self/stack and checking that
the assembly looked sound. For GCC (where we require version 5.1.0 or
later) I tested with the kernel.org crosstool binares for versions
5.5.0, 6.4.0, 6.5.0, 7.3.0, 7.5.0, 8.1.0, 8.3.0, 8.4.0, 9.2.0, and
10.1.0. For clang (where we require version 10.0.1 or later) I tested
with the llvm.org binary releases of 11.0.0, and 11.0.1.
Fixes: 5fc57df2f6fd ("arm64: stacktrace: Convert to ARCH_STACKWALK")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Jun <chenjun102@huawei.com>
Cc: Marco Elver <elver@google.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: <stable@vger.kernel.org> # 5.10.x
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210319184106.5688-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-03-19 18:41:06 +00:00
|
|
|
void *cookie, struct task_struct *task,
|
|
|
|
struct pt_regs *regs)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
|
|
|
struct stackframe frame;
|
|
|
|
|
2020-09-14 15:34:09 +00:00
|
|
|
if (regs)
|
|
|
|
start_backtrace(&frame, regs->regs[29], regs->pc);
|
|
|
|
else if (task == current)
|
2019-07-02 13:07:28 +00:00
|
|
|
start_backtrace(&frame,
|
arm64: stacktrace: don't trace arch_stack_walk()
We recently converted arm64 to use arch_stack_walk() in commit:
5fc57df2f6fd ("arm64: stacktrace: Convert to ARCH_STACKWALK")
The core stacktrace code expects that (when tracing the current task)
arch_stack_walk() starts a trace at its caller, and does not include
itself in the trace. However, arm64's arch_stack_walk() includes itself,
and so traces include one more entry than callers expect. The core
stacktrace code which calls arch_stack_walk() tries to skip a number of
entries to prevent itself appearing in a trace, and the additional entry
prevents skipping one of the core stacktrace functions, leaving this in
the trace unexpectedly.
We can fix this by having arm64's arch_stack_walk() begin the trace with
its caller. The first value returned by the trace will be
__builtin_return_address(0), i.e. the caller of arch_stack_walk(). The
first frame record to be unwound will be __builtin_frame_address(1),
i.e. the caller's frame record. To prevent surprises, arch_stack_walk()
is also marked noinline.
While __builtin_frame_address(1) is not safe in portable code, local GCC
developers have confirmed that it is safe on arm64. To find the caller's
frame record, the builtin can safely dereference the current function's
frame record or (in theory) could stash the original FP into another GPR
at function entry time, neither of which are problematic.
Prior to this patch, the tracing code would unexpectedly show up in
traces of the current task, e.g.
| # cat /proc/self/stack
| [<0>] stack_trace_save_tsk+0x98/0x100
| [<0>] proc_pid_stack+0xb4/0x130
| [<0>] proc_single_show+0x60/0x110
| [<0>] seq_read_iter+0x230/0x4d0
| [<0>] seq_read+0xdc/0x130
| [<0>] vfs_read+0xac/0x1e0
| [<0>] ksys_read+0x6c/0xfc
| [<0>] __arm64_sys_read+0x20/0x30
| [<0>] el0_svc_common.constprop.0+0x60/0x120
| [<0>] do_el0_svc+0x24/0x90
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0_sync_handler+0x1a4/0x1b0
| [<0>] el0_sync+0x170/0x180
After this patch, the tracing code will not show up in such traces:
| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xb4/0x130
| [<0>] proc_single_show+0x60/0x110
| [<0>] seq_read_iter+0x230/0x4d0
| [<0>] seq_read+0xdc/0x130
| [<0>] vfs_read+0xac/0x1e0
| [<0>] ksys_read+0x6c/0xfc
| [<0>] __arm64_sys_read+0x20/0x30
| [<0>] el0_svc_common.constprop.0+0x60/0x120
| [<0>] do_el0_svc+0x24/0x90
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0_sync_handler+0x1a4/0x1b0
| [<0>] el0_sync+0x170/0x180
Erring on the side of caution, I've given this a spin with a bunch of
toolchains, verifying the output of /proc/self/stack and checking that
the assembly looked sound. For GCC (where we require version 5.1.0 or
later) I tested with the kernel.org crosstool binares for versions
5.5.0, 6.4.0, 6.5.0, 7.3.0, 7.5.0, 8.1.0, 8.3.0, 8.4.0, 9.2.0, and
10.1.0. For clang (where we require version 10.0.1 or later) I tested
with the llvm.org binary releases of 11.0.0, and 11.0.1.
Fixes: 5fc57df2f6fd ("arm64: stacktrace: Convert to ARCH_STACKWALK")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Jun <chenjun102@huawei.com>
Cc: Marco Elver <elver@google.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: <stable@vger.kernel.org> # 5.10.x
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210319184106.5688-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-03-19 18:41:06 +00:00
|
|
|
(unsigned long)__builtin_frame_address(1),
|
|
|
|
(unsigned long)__builtin_return_address(0));
|
2020-09-14 15:34:09 +00:00
|
|
|
else
|
|
|
|
start_backtrace(&frame, thread_saved_fp(task),
|
|
|
|
thread_saved_pc(task));
|
2016-11-03 20:23:08 +00:00
|
|
|
|
2020-09-14 15:34:09 +00:00
|
|
|
walk_stackframe(task, &frame, consume_entry, cookie);
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
2017-09-13 23:28:32 +00:00
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
#endif
|