forked from Minki/linux
panic, kexec: make __crash_kexec() NMI safe
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by:6ce47fd961
("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes:6ce47fd961
("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
7bb5da0d49
commit
05c6257433
@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Because we write directly to the reserved memory region when loading
|
* Because we write directly to the reserved memory region when loading
|
||||||
* crash kernels we need a mutex here to prevent multiple crash kernels
|
* crash kernels we need a serialization here to prevent multiple crash
|
||||||
* from attempting to load simultaneously, and to prevent a crash kernel
|
* kernels from attempting to load simultaneously.
|
||||||
* from loading over the top of a in use crash kernel.
|
|
||||||
*
|
|
||||||
* KISS: always take the mutex.
|
|
||||||
*/
|
*/
|
||||||
if (!mutex_trylock(&kexec_mutex))
|
if (!kexec_trylock())
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
if (flags & KEXEC_ON_CRASH) {
|
if (flags & KEXEC_ON_CRASH) {
|
||||||
@ -165,7 +162,7 @@ out:
|
|||||||
|
|
||||||
kimage_free(image);
|
kimage_free(image);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&kexec_mutex);
|
kexec_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
#include <crypto/hash.h>
|
#include <crypto/hash.h>
|
||||||
#include "kexec_internal.h"
|
#include "kexec_internal.h"
|
||||||
|
|
||||||
DEFINE_MUTEX(kexec_mutex);
|
atomic_t __kexec_lock = ATOMIC_INIT(0);
|
||||||
|
|
||||||
/* Per cpu memory for storing cpu states in case of system crash. */
|
/* Per cpu memory for storing cpu states in case of system crash. */
|
||||||
note_buf_t __percpu *crash_notes;
|
note_buf_t __percpu *crash_notes;
|
||||||
@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init);
|
|||||||
*/
|
*/
|
||||||
void __noclone __crash_kexec(struct pt_regs *regs)
|
void __noclone __crash_kexec(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
/* Take the kexec_mutex here to prevent sys_kexec_load
|
/* Take the kexec_lock here to prevent sys_kexec_load
|
||||||
* running on one cpu from replacing the crash kernel
|
* running on one cpu from replacing the crash kernel
|
||||||
* we are using after a panic on a different cpu.
|
* we are using after a panic on a different cpu.
|
||||||
*
|
*
|
||||||
@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
|
|||||||
* of memory the xchg(&kexec_crash_image) would be
|
* of memory the xchg(&kexec_crash_image) would be
|
||||||
* sufficient. But since I reuse the memory...
|
* sufficient. But since I reuse the memory...
|
||||||
*/
|
*/
|
||||||
if (mutex_trylock(&kexec_mutex)) {
|
if (kexec_trylock()) {
|
||||||
if (kexec_crash_image) {
|
if (kexec_crash_image) {
|
||||||
struct pt_regs fixed_regs;
|
struct pt_regs fixed_regs;
|
||||||
|
|
||||||
@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
|
|||||||
machine_crash_shutdown(&fixed_regs);
|
machine_crash_shutdown(&fixed_regs);
|
||||||
machine_kexec(kexec_crash_image);
|
machine_kexec(kexec_crash_image);
|
||||||
}
|
}
|
||||||
mutex_unlock(&kexec_mutex);
|
kexec_unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
STACK_FRAME_NON_STANDARD(__crash_kexec);
|
STACK_FRAME_NON_STANDARD(__crash_kexec);
|
||||||
@ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void)
|
|||||||
{
|
{
|
||||||
ssize_t size = 0;
|
ssize_t size = 0;
|
||||||
|
|
||||||
if (!mutex_trylock(&kexec_mutex))
|
if (!kexec_trylock())
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
if (crashk_res.end != crashk_res.start)
|
if (crashk_res.end != crashk_res.start)
|
||||||
size = resource_size(&crashk_res);
|
size = resource_size(&crashk_res);
|
||||||
|
|
||||||
mutex_unlock(&kexec_mutex);
|
kexec_unlock();
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size)
|
|||||||
unsigned long old_size;
|
unsigned long old_size;
|
||||||
struct resource *ram_res;
|
struct resource *ram_res;
|
||||||
|
|
||||||
if (!mutex_trylock(&kexec_mutex))
|
if (!kexec_trylock())
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
if (kexec_crash_image) {
|
if (kexec_crash_image) {
|
||||||
@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
|
|||||||
insert_resource(&iomem_resource, ram_res);
|
insert_resource(&iomem_resource, ram_res);
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
mutex_unlock(&kexec_mutex);
|
kexec_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1136,7 +1136,7 @@ int kernel_kexec(void)
|
|||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
if (!mutex_trylock(&kexec_mutex))
|
if (!kexec_trylock())
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
if (!kexec_image) {
|
if (!kexec_image) {
|
||||||
error = -EINVAL;
|
error = -EINVAL;
|
||||||
@ -1212,6 +1212,6 @@ int kernel_kexec(void)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
Unlock:
|
Unlock:
|
||||||
mutex_unlock(&kexec_mutex);
|
kexec_unlock();
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
@ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
|
|||||||
|
|
||||||
image = NULL;
|
image = NULL;
|
||||||
|
|
||||||
if (!mutex_trylock(&kexec_mutex))
|
if (!kexec_trylock())
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
dest_image = &kexec_image;
|
dest_image = &kexec_image;
|
||||||
@ -411,7 +411,7 @@ out:
|
|||||||
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
|
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
|
||||||
arch_kexec_protect_crashkres();
|
arch_kexec_protect_crashkres();
|
||||||
|
|
||||||
mutex_unlock(&kexec_mutex);
|
kexec_unlock();
|
||||||
kimage_free(image);
|
kimage_free(image);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
|
|||||||
int kimage_is_destination_range(struct kimage *image,
|
int kimage_is_destination_range(struct kimage *image,
|
||||||
unsigned long start, unsigned long end);
|
unsigned long start, unsigned long end);
|
||||||
|
|
||||||
extern struct mutex kexec_mutex;
|
/*
|
||||||
|
* Whatever is used to serialize accesses to the kexec_crash_image needs to be
|
||||||
|
* NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
|
||||||
|
* "simple" atomic variable that is acquired with a cmpxchg().
|
||||||
|
*/
|
||||||
|
extern atomic_t __kexec_lock;
|
||||||
|
static inline bool kexec_trylock(void)
|
||||||
|
{
|
||||||
|
return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
|
||||||
|
}
|
||||||
|
static inline void kexec_unlock(void)
|
||||||
|
{
|
||||||
|
atomic_set_release(&__kexec_lock, 0);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_KEXEC_FILE
|
#ifdef CONFIG_KEXEC_FILE
|
||||||
#include <linux/purgatory.h>
|
#include <linux/purgatory.h>
|
||||||
|
Loading…
Reference in New Issue
Block a user