mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 05:02:12 +00:00
x86/mce: Grade uncorrected errors for SMCA-enabled systems
For upcoming processors with Scalable MCA feature, we need to check the "succor" CPUID bit and the TCC bit in the MCx_STATUS register in order to grade an MCE's severity. Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com> Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com> [ Simplified code flow, shortened comments. ] Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: linux-edac <linux-edac@vger.kernel.org> Link: http://lkml.kernel.org/r/1459886686-13977-3-git-send-email-Yazen.Ghannam@amd.com Link: http://lkml.kernel.org/r/1462019637-16474-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
10001d91aa
commit
6bda529ec4
@ -204,6 +204,33 @@ static int error_context(struct mce *m)
|
|||||||
return IN_KERNEL;
|
return IN_KERNEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int mce_severity_amd_smca(struct mce *m, int err_ctx)
|
||||||
|
{
|
||||||
|
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
|
||||||
|
u32 low, high;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to look at the following bits:
|
||||||
|
* - "succor" bit (data poisoning support), and
|
||||||
|
* - TCC bit (Task Context Corrupt)
|
||||||
|
* in MCi_STATUS to determine error severity.
|
||||||
|
*/
|
||||||
|
if (!mce_flags.succor)
|
||||||
|
return MCE_PANIC_SEVERITY;
|
||||||
|
|
||||||
|
if (rdmsr_safe(addr, &low, &high))
|
||||||
|
return MCE_PANIC_SEVERITY;
|
||||||
|
|
||||||
|
/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
|
||||||
|
if ((low & MCI_CONFIG_MCAX) &&
|
||||||
|
(m->status & MCI_STATUS_TCC) &&
|
||||||
|
(err_ctx == IN_KERNEL))
|
||||||
|
return MCE_PANIC_SEVERITY;
|
||||||
|
|
||||||
|
/* ...otherwise invoke hwpoison handler. */
|
||||||
|
return MCE_AR_SEVERITY;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
|
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
|
||||||
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
|
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
|
||||||
@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
|
|||||||
* to at least kill process to prolong system operation.
|
* to at least kill process to prolong system operation.
|
||||||
*/
|
*/
|
||||||
if (mce_flags.overflow_recov) {
|
if (mce_flags.overflow_recov) {
|
||||||
|
if (mce_flags.smca)
|
||||||
|
return mce_severity_amd_smca(m, ctx);
|
||||||
|
|
||||||
/* software can try to contain */
|
/* software can try to contain */
|
||||||
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
|
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
|
||||||
return MCE_PANIC_SEVERITY;
|
return MCE_PANIC_SEVERITY;
|
||||||
|
Loading…
Reference in New Issue
Block a user