panic, kexec: make __crash_kexec() NMI safe

Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
author: Valentin Schneider <vschneid@redhat.com> 2022-06-30 23:32:58 +0100
committer: Andrew Morton <akpm@linux-foundation.org> 2022-09-11 21:55:06 -0700
commit: 05c6257433b7212f07a7e53479a8ab038fc1666a (patch)
tree: 8c44b36c6a1c9585694e877f22451d79b5f2ec79 /kernel/kexec_core.c
parent: 7bb5da0d490b2d836c5218f5186ee588d2145310 (diff)
1 files changed, 10 insertions, 10 deletions
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 8fa4eeb95b30..5ca4d40c9ec1 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -46,7 +46,7 @@
 #include <crypto/hash.h>
 #include "kexec_internal.h"
 
-DEFINE_MUTEX(kexec_mutex);
+atomic_t __kexec_lock = ATOMIC_INIT(0);
 
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
@@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init);
  */
 void __noclone __crash_kexec(struct pt_regs *regs)
 {
-	/* Take the kexec_mutex here to prevent sys_kexec_load
+	/* Take the kexec_lock here to prevent sys_kexec_load
 	 * running on one cpu from replacing the crash kernel
 	 * we are using after a panic on a different cpu.
 	 *
@@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
 	 * of memory the xchg(&kexec_crash_image) would be
 	 * sufficient.  But since I reuse the memory...
 	 */
-	if (mutex_trylock(&kexec_mutex)) {
+	if (kexec_trylock()) {
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
 
@@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
 			machine_crash_shutdown(&fixed_regs);
 			machine_kexec(kexec_crash_image);
 		}
-		mutex_unlock(&kexec_mutex);
+		kexec_unlock();
 	}
 }
 STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void)
 {
 	ssize_t size = 0;
 
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 
 	if (crashk_res.end != crashk_res.start)
 		size = resource_size(&crashk_res);
 
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	return size;
 }
 
@@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size)
 	unsigned long old_size;
 	struct resource *ram_res;
 
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 
 	if (kexec_crash_image) {
@@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
 	insert_resource(&iomem_resource, ram_res);
 
 unlock:
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	return ret;
 }
 
@@ -1136,7 +1136,7 @@ int kernel_kexec(void)
 {
 	int error = 0;
 
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 	if (!kexec_image) {
 		error = -EINVAL;
@@ -1212,6 +1212,6 @@ int kernel_kexec(void)
 #endif
 
  Unlock:
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	return error;
 }
author	Valentin Schneider <vschneid@redhat.com>	2022-06-30 23:32:58 +0100
committer	Andrew Morton <akpm@linux-foundation.org>	2022-09-11 21:55:06 -0700
commit	05c6257433b7212f07a7e53479a8ab038fc1666a (patch)
tree	8c44b36c6a1c9585694e877f22451d79b5f2ec79 /kernel/kexec_core.c
parent	7bb5da0d490b2d836c5218f5186ee588d2145310 (diff)