x86/mce: Carve out the crashing_cpu check
Carve out the rendezvous handler timeout avoidance check into a separate function in order to simplify the #MC handler. No functional changes. Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/20180622095428.626-4-bp@alien8.de
This commit is contained in:
parent
bc39f01020
commit
d3d6923cd1
@ -1104,6 +1104,34 @@ static void mce_unmap_kpfn(unsigned long pfn)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Cases where we avoid rendezvous handler timeout:
|
||||
* 1) If this CPU is offline.
|
||||
*
|
||||
* 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
|
||||
* skip those CPUs which remain looping in the 1st kernel - see
|
||||
* crash_nmi_callback().
|
||||
*
|
||||
* Note: there still is a small window between kexec-ing and the new,
|
||||
* kdump kernel establishing a new #MC handler where a broadcasted MCE
|
||||
* might not get handled properly.
|
||||
*/
|
||||
static bool __mc_check_crashing_cpu(int cpu)
|
||||
{
|
||||
if (cpu_is_offline(cpu) ||
|
||||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
|
||||
u64 mcgstatus;
|
||||
|
||||
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
if (mcgstatus & MCG_STATUS_RIPV) {
|
||||
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* The actual machine check handler. This only handles real
|
||||
* exceptions when something got corrupted coming in through int 18.
|
||||
@ -1118,60 +1146,42 @@ static void mce_unmap_kpfn(unsigned long pfn)
|
||||
*/
|
||||
void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
int cpu = smp_processor_id();
|
||||
char *msg = "Unknown";
|
||||
struct mce m, *final;
|
||||
int i;
|
||||
int worst = 0;
|
||||
int severity;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Establish sequential order between the CPUs entering the machine
|
||||
* check handler.
|
||||
*/
|
||||
int order = -1;
|
||||
|
||||
/*
|
||||
* If no_way_out gets set, there is no safe way to recover from this
|
||||
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
|
||||
*/
|
||||
int no_way_out = 0;
|
||||
|
||||
/*
|
||||
* If kill_it gets set, there might be a way to recover from this
|
||||
* error.
|
||||
*/
|
||||
int kill_it = 0;
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
char *msg = "Unknown";
|
||||
|
||||
/*
|
||||
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
|
||||
* on Intel.
|
||||
*/
|
||||
int lmce = 1;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* Cases where we avoid rendezvous handler timeout:
|
||||
* 1) If this CPU is offline.
|
||||
*
|
||||
* 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
|
||||
* skip those CPUs which remain looping in the 1st kernel - see
|
||||
* crash_nmi_callback().
|
||||
*
|
||||
* Note: there still is a small window between kexec-ing and the new,
|
||||
* kdump kernel establishing a new #MC handler where a broadcasted MCE
|
||||
* might not get handled properly.
|
||||
*/
|
||||
if (cpu_is_offline(cpu) ||
|
||||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
|
||||
u64 mcgstatus;
|
||||
|
||||
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
if (mcgstatus & MCG_STATUS_RIPV) {
|
||||
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (__mc_check_crashing_cpu(cpu))
|
||||
return;
|
||||
|
||||
ist_enter(regs);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user