forked from Minki/linux
x86/mce: Make the MCE notifier a blocking one
The NFIT MCE handler callback (for handling media errors on NVDIMMs)
takes a mutex to add the location of a memory error to a list. But since
the notifier call chain for machine checks (x86_mce_decoder_chain) is
atomic, we get a lockdep splat like:
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0
[..]
Call Trace:
dump_stack
___might_sleep
__might_sleep
mutex_lock_nested
? __lock_acquire
nfit_handle_mce
notifier_call_chain
atomic_notifier_call_chain
? atomic_notifier_call_chain
mce_gen_pool_process
Convert the notifier to a blocking one which gets to run only in process
context.
Boris: remove the notifier call in atomic context in print_mce(). For
now, let's print the MCE on the atomic path so that we can make sure
they go out and get logged at least.
Fixes: 6839a6d96f
("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
4f7d029b9b
commit
0dc9c639e6
@ -85,7 +85,7 @@ void mce_gen_pool_process(struct work_struct *__unused)
|
||||
head = llist_reverse_order(head);
|
||||
llist_for_each_entry_safe(node, tmp, head, llnode) {
|
||||
mce = &node->mce;
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ enum severity_level {
|
||||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
extern struct atomic_notifier_head x86_mce_decoder_chain;
|
||||
extern struct blocking_notifier_head x86_mce_decoder_chain;
|
||||
|
||||
#define ATTR_LEN 16
|
||||
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
|
||||
|
@ -123,7 +123,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
|
||||
* CPU/chipset specific EDAC code can register a notifier call here to print
|
||||
* MCE errors in a human-readable form.
|
||||
*/
|
||||
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
|
||||
/* Do initial initialization of a struct mce */
|
||||
void mce_setup(struct mce *m)
|
||||
@ -220,7 +220,7 @@ void mce_register_decode_chain(struct notifier_block *nb)
|
||||
|
||||
WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);
|
||||
|
||||
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
||||
blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
|
||||
|
||||
@ -228,7 +228,7 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
|
||||
{
|
||||
atomic_dec(&num_notifiers);
|
||||
|
||||
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
|
||||
blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
|
||||
|
||||
@ -321,18 +321,7 @@ static void __print_mce(struct mce *m)
|
||||
|
||||
static void print_mce(struct mce *m)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
__print_mce(m);
|
||||
|
||||
/*
|
||||
* Print out human-readable details about the MCE error,
|
||||
* (if the CPU has an implementation for that)
|
||||
*/
|
||||
ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
||||
if (ret == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user