Merge branch kvm-arm64/nvhe-stacktrace into kvmarm-master/next
* kvm-arm64/nvhe-stacktrace: (27 commits) : . : Add an overflow stack to the nVHE EL2 code, allowing : the implementation of an unwinder, courtesy of : Kalesh Singh. From the cover letter (slightly edited): : : "nVHE has two modes of operation: protected (pKVM) and unprotected : (conventional nVHE). Depending on the mode, a slightly different approach : is used to dump the hypervisor stacktrace but the core unwinding logic : remains the same. : : * Protected nVHE (pKVM) stacktraces: : : In protected nVHE mode, the host cannot directly access hypervisor memory. : : The hypervisor stack unwinding happens in EL2 and is made accessible to : the host via a shared buffer. Symbolizing and printing the stacktrace : addresses is delegated to the host and happens in EL1. : : * Non-protected (Conventional) nVHE stacktraces: : : In non-protected mode, the host is able to directly access the hypervisor : stack pages. : : The hypervisor stack unwinding and dumping of the stacktrace is performed : by the host in EL1, as this avoids the memory overhead of setting up : shared buffers between the host and hypervisor." : : Additional patches from Oliver Upton and Marc Zyngier, tidying up : the initial series. : . arm64: Update 'unwinder howto' KVM: arm64: Don't open code ARRAY_SIZE() KVM: arm64: Move nVHE-only helpers into kvm/stacktrace.c KVM: arm64: Make unwind()/on_accessible_stack() per-unwinder functions KVM: arm64: Move nVHE stacktrace unwinding into its own compilation unit KVM: arm64: Move PROTECTED_NVHE_STACKTRACE around KVM: arm64: Introduce pkvm_dump_backtrace() KVM: arm64: Implement protected nVHE hyp stack unwinder KVM: arm64: Save protected-nVHE (pKVM) hyp stacktrace KVM: arm64: Stub implementation of pKVM HYP stack unwinder KVM: arm64: Allocate shared pKVM hyp stacktrace buffers KVM: arm64: Add PROTECTED_NVHE_STACKTRACE Kconfig KVM: arm64: Introduce hyp_dump_backtrace() KVM: arm64: Implement non-protected nVHE hyp stack unwinder KVM: arm64: Prepare non-protected nVHE hypervisor stacktrace KVM: arm64: Stub implementation of non-protected nVHE HYP stack unwinder KVM: arm64: On stack overflow switch to hyp overflow_stack arm64: stacktrace: Add description of stacktrace/common.h arm64: stacktrace: Factor out common unwind() arm64: stacktrace: Handle frame pointer from different address spaces ... Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
commit
0982c8d859
@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
|
|||||||
unsigned long vtcr;
|
unsigned long vtcr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
|
||||||
|
* hyp_panic() in non-protected mode.
|
||||||
|
*
|
||||||
|
* @stack_base: hyp VA of the hyp_stack base.
|
||||||
|
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
|
||||||
|
* @fp: hyp FP where the backtrace begins.
|
||||||
|
* @pc: hyp PC where the backtrace begins.
|
||||||
|
*/
|
||||||
|
struct kvm_nvhe_stacktrace_info {
|
||||||
|
unsigned long stack_base;
|
||||||
|
unsigned long overflow_stack_base;
|
||||||
|
unsigned long fp;
|
||||||
|
unsigned long pc;
|
||||||
|
};
|
||||||
|
|
||||||
/* Translate a kernel address @ptr into its equivalent linear mapping */
|
/* Translate a kernel address @ptr into its equivalent linear mapping */
|
||||||
#define kvm_ksym_ref(ptr) \
|
#define kvm_ksym_ref(ptr) \
|
||||||
({ \
|
({ \
|
||||||
|
@ -113,6 +113,14 @@
|
|||||||
|
|
||||||
#define OVERFLOW_STACK_SIZE SZ_4K
|
#define OVERFLOW_STACK_SIZE SZ_4K
|
||||||
|
|
||||||
|
/*
|
||||||
|
* With the minimum frame size of [x29, x30], exactly half the combined
|
||||||
|
* sizes of the hyp and overflow stacks is the maximum size needed to
|
||||||
|
* save the unwinded stacktrace; plus an additional entry to delimit the
|
||||||
|
* end.
|
||||||
|
*/
|
||||||
|
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Alignment of kernel segments (e.g. .text, .data).
|
* Alignment of kernel segments (e.g. .text, .data).
|
||||||
*
|
*
|
||||||
|
@ -8,52 +8,20 @@
|
|||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/sched/task_stack.h>
|
#include <linux/sched/task_stack.h>
|
||||||
#include <linux/types.h>
|
|
||||||
#include <linux/llist.h>
|
#include <linux/llist.h>
|
||||||
|
|
||||||
#include <asm/memory.h>
|
#include <asm/memory.h>
|
||||||
|
#include <asm/pointer_auth.h>
|
||||||
#include <asm/ptrace.h>
|
#include <asm/ptrace.h>
|
||||||
#include <asm/sdei.h>
|
#include <asm/sdei.h>
|
||||||
|
|
||||||
enum stack_type {
|
#include <asm/stacktrace/common.h>
|
||||||
STACK_TYPE_UNKNOWN,
|
|
||||||
STACK_TYPE_TASK,
|
|
||||||
STACK_TYPE_IRQ,
|
|
||||||
STACK_TYPE_OVERFLOW,
|
|
||||||
STACK_TYPE_SDEI_NORMAL,
|
|
||||||
STACK_TYPE_SDEI_CRITICAL,
|
|
||||||
__NR_STACK_TYPES
|
|
||||||
};
|
|
||||||
|
|
||||||
struct stack_info {
|
|
||||||
unsigned long low;
|
|
||||||
unsigned long high;
|
|
||||||
enum stack_type type;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
|
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
|
||||||
const char *loglvl);
|
const char *loglvl);
|
||||||
|
|
||||||
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
|
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
|
||||||
|
|
||||||
static inline bool on_stack(unsigned long sp, unsigned long size,
|
|
||||||
unsigned long low, unsigned long high,
|
|
||||||
enum stack_type type, struct stack_info *info)
|
|
||||||
{
|
|
||||||
if (!low)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (sp < low || sp + size < sp || sp + size > high)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (info) {
|
|
||||||
info->low = low;
|
|
||||||
info->high = high;
|
|
||||||
info->type = type;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
|
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
|
||||||
struct stack_info *info)
|
struct stack_info *info)
|
||||||
{
|
{
|
||||||
@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
|
|||||||
struct stack_info *info) { return false; }
|
struct stack_info *info) { return false; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We can only safely access per-cpu stacks from current in a non-preemptible
|
|
||||||
* context.
|
|
||||||
*/
|
|
||||||
static inline bool on_accessible_stack(const struct task_struct *tsk,
|
|
||||||
unsigned long sp, unsigned long size,
|
|
||||||
struct stack_info *info)
|
|
||||||
{
|
|
||||||
if (info)
|
|
||||||
info->type = STACK_TYPE_UNKNOWN;
|
|
||||||
|
|
||||||
if (on_task_stack(tsk, sp, size, info))
|
|
||||||
return true;
|
|
||||||
if (tsk != current || preemptible())
|
|
||||||
return false;
|
|
||||||
if (on_irq_stack(sp, size, info))
|
|
||||||
return true;
|
|
||||||
if (on_overflow_stack(sp, size, info))
|
|
||||||
return true;
|
|
||||||
if (on_sdei_stack(sp, size, info))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __ASM_STACKTRACE_H */
|
#endif /* __ASM_STACKTRACE_H */
|
||||||
|
199
arch/arm64/include/asm/stacktrace/common.h
Normal file
199
arch/arm64/include/asm/stacktrace/common.h
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||||
|
/*
|
||||||
|
* Common arm64 stack unwinder code.
|
||||||
|
*
|
||||||
|
* To implement a new arm64 stack unwinder:
|
||||||
|
* 1) Include this header
|
||||||
|
*
|
||||||
|
* 2) Call into unwind_next_common() from your top level unwind
|
||||||
|
* function, passing it the validation and translation callbacks
|
||||||
|
* (though the later can be NULL if no translation is required).
|
||||||
|
*
|
||||||
|
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 ARM Ltd.
|
||||||
|
*/
|
||||||
|
#ifndef __ASM_STACKTRACE_COMMON_H
|
||||||
|
#define __ASM_STACKTRACE_COMMON_H
|
||||||
|
|
||||||
|
#include <linux/bitmap.h>
|
||||||
|
#include <linux/bitops.h>
|
||||||
|
#include <linux/kprobes.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
enum stack_type {
|
||||||
|
STACK_TYPE_UNKNOWN,
|
||||||
|
STACK_TYPE_TASK,
|
||||||
|
STACK_TYPE_IRQ,
|
||||||
|
STACK_TYPE_OVERFLOW,
|
||||||
|
STACK_TYPE_SDEI_NORMAL,
|
||||||
|
STACK_TYPE_SDEI_CRITICAL,
|
||||||
|
STACK_TYPE_HYP,
|
||||||
|
__NR_STACK_TYPES
|
||||||
|
};
|
||||||
|
|
||||||
|
struct stack_info {
|
||||||
|
unsigned long low;
|
||||||
|
unsigned long high;
|
||||||
|
enum stack_type type;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A snapshot of a frame record or fp/lr register values, along with some
|
||||||
|
* accounting information necessary for robust unwinding.
|
||||||
|
*
|
||||||
|
* @fp: The fp value in the frame record (or the real fp)
|
||||||
|
* @pc: The lr value in the frame record (or the real lr)
|
||||||
|
*
|
||||||
|
* @stacks_done: Stacks which have been entirely unwound, for which it is no
|
||||||
|
* longer valid to unwind to.
|
||||||
|
*
|
||||||
|
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
|
||||||
|
* of 0. This is used to ensure that within a stack, each
|
||||||
|
* subsequent frame record is at an increasing address.
|
||||||
|
* @prev_type: The type of stack this frame record was on, or a synthetic
|
||||||
|
* value of STACK_TYPE_UNKNOWN. This is used to detect a
|
||||||
|
* transition from one stack to another.
|
||||||
|
*
|
||||||
|
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
||||||
|
* associated with the most recently encountered replacement lr
|
||||||
|
* value.
|
||||||
|
*
|
||||||
|
* @task: The task being unwound.
|
||||||
|
*/
|
||||||
|
struct unwind_state {
|
||||||
|
unsigned long fp;
|
||||||
|
unsigned long pc;
|
||||||
|
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
|
||||||
|
unsigned long prev_fp;
|
||||||
|
enum stack_type prev_type;
|
||||||
|
#ifdef CONFIG_KRETPROBES
|
||||||
|
struct llist_node *kr_cur;
|
||||||
|
#endif
|
||||||
|
struct task_struct *task;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline bool on_stack(unsigned long sp, unsigned long size,
|
||||||
|
unsigned long low, unsigned long high,
|
||||||
|
enum stack_type type, struct stack_info *info)
|
||||||
|
{
|
||||||
|
if (!low)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (sp < low || sp + size < sp || sp + size > high)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (info) {
|
||||||
|
info->low = low;
|
||||||
|
info->high = high;
|
||||||
|
info->type = type;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void unwind_init_common(struct unwind_state *state,
|
||||||
|
struct task_struct *task)
|
||||||
|
{
|
||||||
|
state->task = task;
|
||||||
|
#ifdef CONFIG_KRETPROBES
|
||||||
|
state->kr_cur = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prime the first unwind.
|
||||||
|
*
|
||||||
|
* In unwind_next() we'll check that the FP points to a valid stack,
|
||||||
|
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
|
||||||
|
* treated as a transition to whichever stack that happens to be. The
|
||||||
|
* prev_fp value won't be used, but we set it to 0 such that it is
|
||||||
|
* definitely not an accessible stack address.
|
||||||
|
*/
|
||||||
|
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
|
||||||
|
state->prev_fp = 0;
|
||||||
|
state->prev_type = STACK_TYPE_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
|
||||||
|
* a kernel address.
|
||||||
|
*
|
||||||
|
* @fp: the frame pointer to be updated to its kernel address.
|
||||||
|
* @type: the stack type associated with frame pointer @fp
|
||||||
|
*
|
||||||
|
* Returns true and success and @fp is updated to the corresponding
|
||||||
|
* kernel virtual address; otherwise returns false.
|
||||||
|
*/
|
||||||
|
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
|
||||||
|
enum stack_type type);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* on_accessible_stack_fn() - Check whether a stack range is on any
|
||||||
|
* of the possible stacks.
|
||||||
|
*
|
||||||
|
* @tsk: task whose stack is being unwound
|
||||||
|
* @sp: stack address being checked
|
||||||
|
* @size: size of the stack range being checked
|
||||||
|
* @info: stack unwinding context
|
||||||
|
*/
|
||||||
|
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
|
||||||
|
unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info);
|
||||||
|
|
||||||
|
static inline int unwind_next_common(struct unwind_state *state,
|
||||||
|
struct stack_info *info,
|
||||||
|
on_accessible_stack_fn accessible,
|
||||||
|
stack_trace_translate_fp_fn translate_fp)
|
||||||
|
{
|
||||||
|
unsigned long fp = state->fp, kern_fp = fp;
|
||||||
|
struct task_struct *tsk = state->task;
|
||||||
|
|
||||||
|
if (fp & 0x7)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!accessible(tsk, fp, 16, info))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (test_bit(info->type, state->stacks_done))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If fp is not from the current address space perform the necessary
|
||||||
|
* translation before dereferencing it to get the next fp.
|
||||||
|
*/
|
||||||
|
if (translate_fp && !translate_fp(&kern_fp, info->type))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As stacks grow downward, any valid record on the same stack must be
|
||||||
|
* at a strictly higher address than the prior record.
|
||||||
|
*
|
||||||
|
* Stacks can nest in several valid orders, e.g.
|
||||||
|
*
|
||||||
|
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
|
||||||
|
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
|
||||||
|
* HYP -> OVERFLOW
|
||||||
|
*
|
||||||
|
* ... but the nesting itself is strict. Once we transition from one
|
||||||
|
* stack to another, it's never valid to unwind back to that first
|
||||||
|
* stack.
|
||||||
|
*/
|
||||||
|
if (info->type == state->prev_type) {
|
||||||
|
if (fp <= state->prev_fp)
|
||||||
|
return -EINVAL;
|
||||||
|
} else {
|
||||||
|
__set_bit(state->prev_type, state->stacks_done);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Record this frame record's values and location. The prev_fp and
|
||||||
|
* prev_type are only meaningful to the next unwind_next() invocation.
|
||||||
|
*/
|
||||||
|
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
|
||||||
|
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
|
||||||
|
state->prev_fp = fp;
|
||||||
|
state->prev_type = info->type;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __ASM_STACKTRACE_COMMON_H */
|
55
arch/arm64/include/asm/stacktrace/nvhe.h
Normal file
55
arch/arm64/include/asm/stacktrace/nvhe.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||||
|
/*
|
||||||
|
* KVM nVHE hypervisor stack tracing support.
|
||||||
|
*
|
||||||
|
* The unwinder implementation depends on the nVHE mode:
|
||||||
|
*
|
||||||
|
* 1) Non-protected nVHE mode - the host can directly access the
|
||||||
|
* HYP stack pages and unwind the HYP stack in EL1. This saves having
|
||||||
|
* to allocate shared buffers for the host to read the unwinded
|
||||||
|
* stacktrace.
|
||||||
|
*
|
||||||
|
* 2) pKVM (protected nVHE) mode - the host cannot directly access
|
||||||
|
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
|
||||||
|
* buffer where the host can read and print the stacktrace.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Google LLC
|
||||||
|
*/
|
||||||
|
#ifndef __ASM_STACKTRACE_NVHE_H
|
||||||
|
#define __ASM_STACKTRACE_NVHE_H
|
||||||
|
|
||||||
|
#include <asm/stacktrace/common.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
|
||||||
|
*
|
||||||
|
* @state : unwind_state to initialize
|
||||||
|
* @fp : frame pointer at which to start the unwinding.
|
||||||
|
* @pc : program counter at which to start the unwinding.
|
||||||
|
*/
|
||||||
|
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
|
||||||
|
unsigned long fp,
|
||||||
|
unsigned long pc)
|
||||||
|
{
|
||||||
|
unwind_init_common(state, NULL);
|
||||||
|
|
||||||
|
state->fp = fp;
|
||||||
|
state->pc = pc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||||
|
/*
|
||||||
|
* Conventional (non-protected) nVHE HYP stack unwinder
|
||||||
|
*
|
||||||
|
* In non-protected mode, the unwinding is done from kernel proper context
|
||||||
|
* (by the host in EL1).
|
||||||
|
*/
|
||||||
|
|
||||||
|
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
|
||||||
|
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
|
||||||
|
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||||
|
|
||||||
|
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
|
||||||
|
|
||||||
|
#endif /* __KVM_NVHE_HYPERVISOR__ */
|
||||||
|
#endif /* __ASM_STACKTRACE_NVHE_H */
|
@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
|
|||||||
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
|
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
|
||||||
CFLAGS_syscall.o += -fno-stack-protector
|
CFLAGS_syscall.o += -fno-stack-protector
|
||||||
|
|
||||||
|
# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
|
||||||
|
# can significantly impact performance. Avoid instrumenting the stack trace
|
||||||
|
# collection code to minimize this impact.
|
||||||
|
KASAN_SANITIZE_stacktrace.o := n
|
||||||
|
|
||||||
# It's not safe to invoke KCOV when portions of the kernel environment aren't
|
# It's not safe to invoke KCOV when portions of the kernel environment aren't
|
||||||
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
|
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
|
||||||
# inhibit KCOV instrumentation, disable it for the entire compilation unit.
|
# inhibit KCOV instrumentation, disable it for the entire compilation unit.
|
||||||
|
@ -7,72 +7,90 @@
|
|||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <linux/ftrace.h>
|
#include <linux/ftrace.h>
|
||||||
#include <linux/kprobes.h>
|
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/sched/debug.h>
|
#include <linux/sched/debug.h>
|
||||||
#include <linux/sched/task_stack.h>
|
#include <linux/sched/task_stack.h>
|
||||||
#include <linux/stacktrace.h>
|
#include <linux/stacktrace.h>
|
||||||
|
|
||||||
#include <asm/irq.h>
|
#include <asm/irq.h>
|
||||||
#include <asm/pointer_auth.h>
|
|
||||||
#include <asm/stack_pointer.h>
|
#include <asm/stack_pointer.h>
|
||||||
#include <asm/stacktrace.h>
|
#include <asm/stacktrace.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A snapshot of a frame record or fp/lr register values, along with some
|
* Start an unwind from a pt_regs.
|
||||||
* accounting information necessary for robust unwinding.
|
|
||||||
*
|
*
|
||||||
* @fp: The fp value in the frame record (or the real fp)
|
* The unwind will begin at the PC within the regs.
|
||||||
* @pc: The lr value in the frame record (or the real lr)
|
|
||||||
*
|
*
|
||||||
* @stacks_done: Stacks which have been entirely unwound, for which it is no
|
* The regs must be on a stack currently owned by the calling task.
|
||||||
* longer valid to unwind to.
|
|
||||||
*
|
|
||||||
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
|
|
||||||
* of 0. This is used to ensure that within a stack, each
|
|
||||||
* subsequent frame record is at an increasing address.
|
|
||||||
* @prev_type: The type of stack this frame record was on, or a synthetic
|
|
||||||
* value of STACK_TYPE_UNKNOWN. This is used to detect a
|
|
||||||
* transition from one stack to another.
|
|
||||||
*
|
|
||||||
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
|
||||||
* associated with the most recently encountered replacement lr
|
|
||||||
* value.
|
|
||||||
*/
|
*/
|
||||||
struct unwind_state {
|
static inline void unwind_init_from_regs(struct unwind_state *state,
|
||||||
unsigned long fp;
|
struct pt_regs *regs)
|
||||||
unsigned long pc;
|
|
||||||
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
|
|
||||||
unsigned long prev_fp;
|
|
||||||
enum stack_type prev_type;
|
|
||||||
#ifdef CONFIG_KRETPROBES
|
|
||||||
struct llist_node *kr_cur;
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
|
|
||||||
unsigned long pc)
|
|
||||||
{
|
{
|
||||||
state->fp = fp;
|
unwind_init_common(state, current);
|
||||||
state->pc = pc;
|
|
||||||
#ifdef CONFIG_KRETPROBES
|
|
||||||
state->kr_cur = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
state->fp = regs->regs[29];
|
||||||
* Prime the first unwind.
|
state->pc = regs->pc;
|
||||||
*
|
}
|
||||||
* In unwind_next() we'll check that the FP points to a valid stack,
|
|
||||||
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
|
/*
|
||||||
* treated as a transition to whichever stack that happens to be. The
|
* Start an unwind from a caller.
|
||||||
* prev_fp value won't be used, but we set it to 0 such that it is
|
*
|
||||||
* definitely not an accessible stack address.
|
* The unwind will begin at the caller of whichever function this is inlined
|
||||||
*/
|
* into.
|
||||||
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
|
*
|
||||||
state->prev_fp = 0;
|
* The function which invokes this must be noinline.
|
||||||
state->prev_type = STACK_TYPE_UNKNOWN;
|
*/
|
||||||
|
static __always_inline void unwind_init_from_caller(struct unwind_state *state)
|
||||||
|
{
|
||||||
|
unwind_init_common(state, current);
|
||||||
|
|
||||||
|
state->fp = (unsigned long)__builtin_frame_address(1);
|
||||||
|
state->pc = (unsigned long)__builtin_return_address(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start an unwind from a blocked task.
|
||||||
|
*
|
||||||
|
* The unwind will begin at the blocked tasks saved PC (i.e. the caller of
|
||||||
|
* cpu_switch_to()).
|
||||||
|
*
|
||||||
|
* The caller should ensure the task is blocked in cpu_switch_to() for the
|
||||||
|
* duration of the unwind, or the unwind will be bogus. It is never valid to
|
||||||
|
* call this for the current task.
|
||||||
|
*/
|
||||||
|
static inline void unwind_init_from_task(struct unwind_state *state,
|
||||||
|
struct task_struct *task)
|
||||||
|
{
|
||||||
|
unwind_init_common(state, task);
|
||||||
|
|
||||||
|
state->fp = thread_saved_fp(task);
|
||||||
|
state->pc = thread_saved_pc(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can only safely access per-cpu stacks from current in a non-preemptible
|
||||||
|
* context.
|
||||||
|
*/
|
||||||
|
static bool on_accessible_stack(const struct task_struct *tsk,
|
||||||
|
unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
if (info)
|
||||||
|
info->type = STACK_TYPE_UNKNOWN;
|
||||||
|
|
||||||
|
if (on_task_stack(tsk, sp, size, info))
|
||||||
|
return true;
|
||||||
|
if (tsk != current || preemptible())
|
||||||
|
return false;
|
||||||
|
if (on_irq_stack(sp, size, info))
|
||||||
|
return true;
|
||||||
|
if (on_overflow_stack(sp, size, info))
|
||||||
|
return true;
|
||||||
|
if (on_sdei_stack(sp, size, info))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(unwind_init);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unwind from one frame record (A) to the next frame record (B).
|
* Unwind from one frame record (A) to the next frame record (B).
|
||||||
@ -81,53 +99,20 @@ NOKPROBE_SYMBOL(unwind_init);
|
|||||||
* records (e.g. a cycle), determined based on the location and fp value of A
|
* records (e.g. a cycle), determined based on the location and fp value of A
|
||||||
* and the location (but not the fp value) of B.
|
* and the location (but not the fp value) of B.
|
||||||
*/
|
*/
|
||||||
static int notrace unwind_next(struct task_struct *tsk,
|
static int notrace unwind_next(struct unwind_state *state)
|
||||||
struct unwind_state *state)
|
|
||||||
{
|
{
|
||||||
|
struct task_struct *tsk = state->task;
|
||||||
unsigned long fp = state->fp;
|
unsigned long fp = state->fp;
|
||||||
struct stack_info info;
|
struct stack_info info;
|
||||||
|
int err;
|
||||||
|
|
||||||
/* Final frame; nothing to unwind */
|
/* Final frame; nothing to unwind */
|
||||||
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
|
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
if (fp & 0x7)
|
err = unwind_next_common(state, &info, on_accessible_stack, NULL);
|
||||||
return -EINVAL;
|
if (err)
|
||||||
|
return err;
|
||||||
if (!on_accessible_stack(tsk, fp, 16, &info))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
if (test_bit(info.type, state->stacks_done))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* As stacks grow downward, any valid record on the same stack must be
|
|
||||||
* at a strictly higher address than the prior record.
|
|
||||||
*
|
|
||||||
* Stacks can nest in several valid orders, e.g.
|
|
||||||
*
|
|
||||||
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
|
|
||||||
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
|
|
||||||
*
|
|
||||||
* ... but the nesting itself is strict. Once we transition from one
|
|
||||||
* stack to another, it's never valid to unwind back to that first
|
|
||||||
* stack.
|
|
||||||
*/
|
|
||||||
if (info.type == state->prev_type) {
|
|
||||||
if (fp <= state->prev_fp)
|
|
||||||
return -EINVAL;
|
|
||||||
} else {
|
|
||||||
set_bit(state->prev_type, state->stacks_done);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Record this frame record's values and location. The prev_fp and
|
|
||||||
* prev_type are only meaningful to the next unwind_next() invocation.
|
|
||||||
*/
|
|
||||||
state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
|
|
||||||
state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
|
|
||||||
state->prev_fp = fp;
|
|
||||||
state->prev_type = info.type;
|
|
||||||
|
|
||||||
state->pc = ptrauth_strip_insn_pac(state->pc);
|
state->pc = ptrauth_strip_insn_pac(state->pc);
|
||||||
|
|
||||||
@ -157,8 +142,7 @@ static int notrace unwind_next(struct task_struct *tsk,
|
|||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(unwind_next);
|
NOKPROBE_SYMBOL(unwind_next);
|
||||||
|
|
||||||
static void notrace unwind(struct task_struct *tsk,
|
static void notrace unwind(struct unwind_state *state,
|
||||||
struct unwind_state *state,
|
|
||||||
stack_trace_consume_fn consume_entry, void *cookie)
|
stack_trace_consume_fn consume_entry, void *cookie)
|
||||||
{
|
{
|
||||||
while (1) {
|
while (1) {
|
||||||
@ -166,7 +150,7 @@ static void notrace unwind(struct task_struct *tsk,
|
|||||||
|
|
||||||
if (!consume_entry(cookie, state->pc))
|
if (!consume_entry(cookie, state->pc))
|
||||||
break;
|
break;
|
||||||
ret = unwind_next(tsk, state);
|
ret = unwind_next(state);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -212,15 +196,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
|||||||
{
|
{
|
||||||
struct unwind_state state;
|
struct unwind_state state;
|
||||||
|
|
||||||
if (regs)
|
if (regs) {
|
||||||
unwind_init(&state, regs->regs[29], regs->pc);
|
if (task != current)
|
||||||
else if (task == current)
|
return;
|
||||||
unwind_init(&state,
|
unwind_init_from_regs(&state, regs);
|
||||||
(unsigned long)__builtin_frame_address(1),
|
} else if (task == current) {
|
||||||
(unsigned long)__builtin_return_address(0));
|
unwind_init_from_caller(&state);
|
||||||
else
|
} else {
|
||||||
unwind_init(&state, thread_saved_fp(task),
|
unwind_init_from_task(&state, task);
|
||||||
thread_saved_pc(task));
|
}
|
||||||
|
|
||||||
unwind(task, &state, consume_entry, cookie);
|
unwind(&state, consume_entry, cookie);
|
||||||
}
|
}
|
||||||
|
@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG
|
|||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
|
config PROTECTED_NVHE_STACKTRACE
|
||||||
|
bool "Protected KVM hypervisor stacktraces"
|
||||||
|
depends on NVHE_EL2_DEBUG
|
||||||
|
default n
|
||||||
|
help
|
||||||
|
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
|
||||||
|
|
||||||
|
If using protected nVHE mode, but cannot afford the associated
|
||||||
|
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
|
||||||
|
say N.
|
||||||
|
|
||||||
|
If unsure, or not using protected nVHE (pKVM), say N.
|
||||||
|
|
||||||
endif # VIRTUALIZATION
|
endif # VIRTUALIZATION
|
||||||
|
@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
|
|||||||
|
|
||||||
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
||||||
inject_fault.o va_layout.o handle_exit.o \
|
inject_fault.o va_layout.o handle_exit.o \
|
||||||
guest.o debug.o reset.o sys_regs.o \
|
guest.o debug.o reset.o sys_regs.o stacktrace.o \
|
||||||
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
|
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
|
||||||
arch_timer.o trng.o vmid.o \
|
arch_timer.o trng.o vmid.o \
|
||||||
vgic/vgic.o vgic/vgic-init.o \
|
vgic/vgic.o vgic/vgic-init.o \
|
||||||
|
@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
|||||||
|
|
||||||
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
|
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||||
|
|
||||||
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||||
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
|
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
|
||||||
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <asm/kvm_emulate.h>
|
#include <asm/kvm_emulate.h>
|
||||||
#include <asm/kvm_mmu.h>
|
#include <asm/kvm_mmu.h>
|
||||||
#include <asm/debug-monitors.h>
|
#include <asm/debug-monitors.h>
|
||||||
|
#include <asm/stacktrace/nvhe.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
|
||||||
#include <kvm/arm_hypercalls.h>
|
#include <kvm/arm_hypercalls.h>
|
||||||
@ -353,6 +354,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
|||||||
(void *)(panic_addr + kaslr_offset()));
|
(void *)(panic_addr + kaslr_offset()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Dump the nVHE hypervisor backtrace */
|
||||||
|
kvm_nvhe_dump_backtrace(hyp_offset);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hyp has panicked and we're going to handle that by panicking the
|
* Hyp has panicked and we're going to handle that by panicking the
|
||||||
* kernel. The kernel offset will be revealed in the panic so we're
|
* kernel. The kernel offset will be revealed in the panic so we're
|
||||||
|
@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
|||||||
|
|
||||||
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||||
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
|
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
|
||||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
|
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
|
||||||
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||||
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
|
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
|
||||||
|
@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc)
|
|||||||
b hyp_panic
|
b hyp_panic
|
||||||
|
|
||||||
.L__hyp_sp_overflow\@:
|
.L__hyp_sp_overflow\@:
|
||||||
/*
|
/* Switch to the overflow stack */
|
||||||
* Reset SP to the top of the stack, to allow handling the hyp_panic.
|
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
|
||||||
* This corrupts the stack but is ok, since we won't be attempting
|
|
||||||
* any unwinding here.
|
|
||||||
*/
|
|
||||||
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
|
|
||||||
mov sp, x0
|
|
||||||
|
|
||||||
b hyp_panic_bad_stack
|
b hyp_panic_bad_stack
|
||||||
ASM_BUG()
|
ASM_BUG()
|
||||||
|
160
arch/arm64/kvm/hyp/nvhe/stacktrace.c
Normal file
160
arch/arm64/kvm/hyp/nvhe/stacktrace.c
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* KVM nVHE hypervisor stack tracing support.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Google LLC
|
||||||
|
*/
|
||||||
|
#include <asm/kvm_asm.h>
|
||||||
|
#include <asm/kvm_hyp.h>
|
||||||
|
#include <asm/memory.h>
|
||||||
|
#include <asm/percpu.h>
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
|
||||||
|
__aligned(16);
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
|
||||||
|
*
|
||||||
|
* @fp : frame pointer at which to start the unwinding.
|
||||||
|
* @pc : program counter at which to start the unwinding.
|
||||||
|
*
|
||||||
|
* Save the information needed by the host to unwind the non-protected
|
||||||
|
* nVHE hypervisor stack in EL1.
|
||||||
|
*/
|
||||||
|
static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
|
||||||
|
{
|
||||||
|
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
|
||||||
|
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||||
|
|
||||||
|
stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
|
||||||
|
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
|
||||||
|
stacktrace_info->fp = fp;
|
||||||
|
stacktrace_info->pc = pc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||||
|
#include <asm/stacktrace/nvhe.h>
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
|
||||||
|
|
||||||
|
static bool on_overflow_stack(unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
|
||||||
|
unsigned long high = low + OVERFLOW_STACK_SIZE;
|
||||||
|
|
||||||
|
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool on_hyp_stack(unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||||
|
unsigned long high = params->stack_hyp_va;
|
||||||
|
unsigned long low = high - PAGE_SIZE;
|
||||||
|
|
||||||
|
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool on_accessible_stack(const struct task_struct *tsk,
|
||||||
|
unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
if (info)
|
||||||
|
info->type = STACK_TYPE_UNKNOWN;
|
||||||
|
|
||||||
|
return (on_overflow_stack(sp, size, info) ||
|
||||||
|
on_hyp_stack(sp, size, info));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int unwind_next(struct unwind_state *state)
|
||||||
|
{
|
||||||
|
struct stack_info info;
|
||||||
|
|
||||||
|
return unwind_next_common(state, &info, on_accessible_stack, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void notrace unwind(struct unwind_state *state,
|
||||||
|
stack_trace_consume_fn consume_entry,
|
||||||
|
void *cookie)
|
||||||
|
{
|
||||||
|
while (1) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!consume_entry(cookie, state->pc))
|
||||||
|
break;
|
||||||
|
ret = unwind_next(state);
|
||||||
|
if (ret < 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
|
||||||
|
*
|
||||||
|
* @arg : index of the entry in the stacktrace buffer
|
||||||
|
* @where : the program counter corresponding to the stack frame
|
||||||
|
*
|
||||||
|
* Save the return address of a stack frame to the shared stacktrace buffer.
|
||||||
|
* The host can access this shared buffer from EL1 to dump the backtrace.
|
||||||
|
*/
|
||||||
|
static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
|
||||||
|
{
|
||||||
|
unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
|
||||||
|
int *idx = (int *)arg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need 2 free slots: 1 for current entry and 1 for the
|
||||||
|
* delimiter.
|
||||||
|
*/
|
||||||
|
if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
stacktrace[*idx] = where;
|
||||||
|
stacktrace[++*idx] = 0UL;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
|
||||||
|
*
|
||||||
|
* @fp : frame pointer at which to start the unwinding.
|
||||||
|
* @pc : program counter at which to start the unwinding.
|
||||||
|
*
|
||||||
|
* Save the unwinded stack addresses to the shared stacktrace buffer.
|
||||||
|
* The host can access this shared buffer from EL1 to dump the backtrace.
|
||||||
|
*/
|
||||||
|
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||||
|
{
|
||||||
|
struct unwind_state state;
|
||||||
|
int idx = 0;
|
||||||
|
|
||||||
|
kvm_nvhe_unwind_init(&state, fp, pc);
|
||||||
|
|
||||||
|
unwind(&state, pkvm_save_backtrace_entry, &idx);
|
||||||
|
}
|
||||||
|
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||||
|
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
|
||||||
|
*
|
||||||
|
* @fp : frame pointer at which to start the unwinding.
|
||||||
|
* @pc : program counter at which to start the unwinding.
|
||||||
|
*
|
||||||
|
* Saves the information needed by the host to dump the nVHE hypervisor
|
||||||
|
* backtrace.
|
||||||
|
*/
|
||||||
|
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
|
||||||
|
{
|
||||||
|
if (is_protected_kvm_enabled())
|
||||||
|
pkvm_save_backtrace(fp, pc);
|
||||||
|
else
|
||||||
|
hyp_prepare_backtrace(fp, pc);
|
||||||
|
}
|
@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
|
|||||||
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||||
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
|
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||||
|
|
||||||
|
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
|
||||||
|
|
||||||
static void __activate_traps(struct kvm_vcpu *vcpu)
|
static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
u64 val;
|
u64 val;
|
||||||
@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void)
|
|||||||
__sysreg_restore_state_nvhe(host_ctxt);
|
__sysreg_restore_state_nvhe(host_ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Prepare to dump kvm nvhe hyp stacktrace */
|
||||||
|
kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
|
||||||
|
_THIS_IP_);
|
||||||
|
|
||||||
__hyp_do_panic(host_ctxt, spsr, elr, par);
|
__hyp_do_panic(host_ctxt, spsr, elr, par);
|
||||||
unreachable();
|
unreachable();
|
||||||
}
|
}
|
||||||
|
218
arch/arm64/kvm/stacktrace.c
Normal file
218
arch/arm64/kvm/stacktrace.c
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||||
|
/*
|
||||||
|
* KVM nVHE hypervisor stack tracing support.
|
||||||
|
*
|
||||||
|
* The unwinder implementation depends on the nVHE mode:
|
||||||
|
*
|
||||||
|
* 1) Non-protected nVHE mode - the host can directly access the
|
||||||
|
* HYP stack pages and unwind the HYP stack in EL1. This saves having
|
||||||
|
* to allocate shared buffers for the host to read the unwinded
|
||||||
|
* stacktrace.
|
||||||
|
*
|
||||||
|
* 2) pKVM (protected nVHE) mode - the host cannot directly access
|
||||||
|
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
|
||||||
|
* buffer where the host can read and print the stacktrace.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Google LLC
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/kvm.h>
|
||||||
|
#include <linux/kvm_host.h>
|
||||||
|
|
||||||
|
#include <asm/stacktrace/nvhe.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
|
||||||
|
*
|
||||||
|
* The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
|
||||||
|
* allow for guard pages below the stack. Consequently, the fixed offset address
|
||||||
|
* translation macros won't work here.
|
||||||
|
*
|
||||||
|
* The kernel VA is calculated as an offset from the kernel VA of the hypervisor
|
||||||
|
* stack base.
|
||||||
|
*
|
||||||
|
* Returns true on success and updates @addr to its corresponding kernel VA;
|
||||||
|
* otherwise returns false.
|
||||||
|
*/
|
||||||
|
static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
|
||||||
|
enum stack_type type)
|
||||||
|
{
|
||||||
|
struct kvm_nvhe_stacktrace_info *stacktrace_info;
|
||||||
|
unsigned long hyp_base, kern_base, hyp_offset;
|
||||||
|
|
||||||
|
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case STACK_TYPE_HYP:
|
||||||
|
kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
|
||||||
|
hyp_base = (unsigned long)stacktrace_info->stack_base;
|
||||||
|
break;
|
||||||
|
case STACK_TYPE_OVERFLOW:
|
||||||
|
kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
|
||||||
|
hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
hyp_offset = *addr - hyp_base;
|
||||||
|
|
||||||
|
*addr = kern_base + hyp_offset;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool on_overflow_stack(unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
struct kvm_nvhe_stacktrace_info *stacktrace_info
|
||||||
|
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||||
|
unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
|
||||||
|
unsigned long high = low + OVERFLOW_STACK_SIZE;
|
||||||
|
|
||||||
|
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool on_hyp_stack(unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
struct kvm_nvhe_stacktrace_info *stacktrace_info
|
||||||
|
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||||
|
unsigned long low = (unsigned long)stacktrace_info->stack_base;
|
||||||
|
unsigned long high = low + PAGE_SIZE;
|
||||||
|
|
||||||
|
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool on_accessible_stack(const struct task_struct *tsk,
|
||||||
|
unsigned long sp, unsigned long size,
|
||||||
|
struct stack_info *info)
|
||||||
|
{
|
||||||
|
if (info)
|
||||||
|
info->type = STACK_TYPE_UNKNOWN;
|
||||||
|
|
||||||
|
return (on_overflow_stack(sp, size, info) ||
|
||||||
|
on_hyp_stack(sp, size, info));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int unwind_next(struct unwind_state *state)
|
||||||
|
{
|
||||||
|
struct stack_info info;
|
||||||
|
|
||||||
|
return unwind_next_common(state, &info, on_accessible_stack,
|
||||||
|
kvm_nvhe_stack_kern_va);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unwind(struct unwind_state *state,
|
||||||
|
stack_trace_consume_fn consume_entry, void *cookie)
|
||||||
|
{
|
||||||
|
while (1) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!consume_entry(cookie, state->pc))
|
||||||
|
break;
|
||||||
|
ret = unwind_next(state);
|
||||||
|
if (ret < 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
|
||||||
|
*
|
||||||
|
* @arg : the hypervisor offset, used for address translation
|
||||||
|
* @where : the program counter corresponding to the stack frame
|
||||||
|
*/
|
||||||
|
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
|
||||||
|
{
|
||||||
|
unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
|
||||||
|
unsigned long hyp_offset = (unsigned long)arg;
|
||||||
|
|
||||||
|
/* Mask tags and convert to kern addr */
|
||||||
|
where = (where & va_mask) + hyp_offset;
|
||||||
|
kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kvm_nvhe_dump_backtrace_start(void)
|
||||||
|
{
|
||||||
|
kvm_err("nVHE call trace:\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kvm_nvhe_dump_backtrace_end(void)
|
||||||
|
{
|
||||||
|
kvm_err("---[ end nVHE call trace ]---\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
|
||||||
|
*
|
||||||
|
* @hyp_offset: hypervisor offset, used for address translation.
|
||||||
|
*
|
||||||
|
* The host can directly access HYP stack pages in non-protected
|
||||||
|
* mode, so the unwinding is done directly from EL1. This removes
|
||||||
|
* the need for shared buffers between host and hypervisor for
|
||||||
|
* the stacktrace.
|
||||||
|
*/
|
||||||
|
static void hyp_dump_backtrace(unsigned long hyp_offset)
|
||||||
|
{
|
||||||
|
struct kvm_nvhe_stacktrace_info *stacktrace_info;
|
||||||
|
struct unwind_state state;
|
||||||
|
|
||||||
|
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||||
|
|
||||||
|
kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
|
||||||
|
|
||||||
|
kvm_nvhe_dump_backtrace_start();
|
||||||
|
unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
|
||||||
|
kvm_nvhe_dump_backtrace_end();
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||||
|
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
|
||||||
|
pkvm_stacktrace);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
|
||||||
|
*
|
||||||
|
* @hyp_offset: hypervisor offset, used for address translation.
|
||||||
|
*
|
||||||
|
* Dumping of the pKVM HYP backtrace is done by reading the
|
||||||
|
* stack addresses from the shared stacktrace buffer, since the
|
||||||
|
* host cannot directly access hypervisor memory in protected
|
||||||
|
* mode.
|
||||||
|
*/
|
||||||
|
static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||||
|
{
|
||||||
|
unsigned long *stacktrace
|
||||||
|
= (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
kvm_nvhe_dump_backtrace_start();
|
||||||
|
/* The saved stacktrace is terminated by a null entry */
|
||||||
|
for (i = 0;
|
||||||
|
i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
|
||||||
|
i++)
|
||||||
|
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
|
||||||
|
kvm_nvhe_dump_backtrace_end();
|
||||||
|
}
|
||||||
|
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||||
|
static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||||
|
{
|
||||||
|
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
|
||||||
|
*
|
||||||
|
* @hyp_offset: hypervisor offset, used for address translation.
|
||||||
|
*/
|
||||||
|
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
|
||||||
|
{
|
||||||
|
if (is_protected_kvm_enabled())
|
||||||
|
pkvm_dump_backtrace(hyp_offset);
|
||||||
|
else
|
||||||
|
hyp_dump_backtrace(hyp_offset);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user