forked from Minki/linux
RAS: Add support for deferred errors on AMD (Aravind Gopalakrishnan)
This is an important RAS feature which adds hardware support for poisoned data. That means roughly that the hardware marks data which it has detected as corrupted but wasn't able to correct, as poisoned data and raises an APIC interrupt to signal that in the form of a deferred error. It is the OS's responsibility then to take proper recovery action and thus prolonge system lifetime as far as possible. Misc cleanups ontop. (Borislav Petkov) -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJVUF2sAAoJEBLB8Bhh3lVKXZ4QAJ3UdVM1/TuqAsZ7+jLkb7BZ BRyWgv31CcX5fM1D0vV+6K+4GPPsLAtNVYy2G+LauFX1bfE1f9ExWKlMzp45h1sS xaNLDhIIP+aE4kD1J7mlNc0WlF0ghlfX+iaGc7lI+j3o2Ydlxm15Pt6Te9hDI7en C1NOWrkJ0+BJv48bPeJ835CLu+DZ6xktWdJ1In88PNUA9YiTj12/nhMKkaGbh3zv Ep3FCFD/tHcecRK/rVmSTE3cG50SLKtndh/Kl7s1wYhgw6ERyg3x/t8QefZkuU0Q 6fbetgYS9VvpewViAuNemoCHY5qxBNHPLsn6vwhluzlelW1CcgINU8LHcGZiaLmd DYVM9bHfSrKrHhH0M55XPn9RQSZpA+cTep3IyQzCK+jmLBiqrH3bMIRHjNQRUOLy DsGLm51tQqaMmnhDma8mMjF7LN+iBqNxXeqvkxQxQBE5NVLXHoaajOgUuj/N59WE FEFa65rmTrmsmgjAn9BPBk0zeoyQaYFKCLhENB19Vlt/4YoY/vHvzFYJNEcQT5ZU kuM8/hSEqeYZH4ZjJ8i9zKVado7z6pRQqV/lwRJ27tuXy9+9y6pV+ewmk8gCjQe4 gvySlHbIlfO5geF59GYenp4ll5CdZFvIJuwhybDBZhk3C7M2M7X/xgHnJnprza6j YVzOp7Jj2aeHGImqGL49 =3e5/ -----END PGP SIGNATURE----- Merge tag 'ras_for_4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras Pull RAS updates from Borislav Petkov: - RAS: Add support for deferred errors on AMD (Aravind Gopalakrishnan) This is an important RAS feature which adds hardware support for poisoned data. That means roughly that the hardware marks data which it has detected as corrupted but wasn't able to correct, as poisoned data and raises an APIC interrupt to signal that in the form of a deferred error. It is the OS's responsibility then to take proper recovery action and thus prolonge system lifetime as far as possible. - Misc cleanups ontop. (Borislav Petkov)" Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
4ddf2a1785
@ -50,4 +50,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
|
||||
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
|
||||
#endif
|
||||
#endif
|
||||
|
@ -33,6 +33,9 @@ typedef struct {
|
||||
#ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
unsigned int irq_threshold_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
unsigned int irq_deferred_error_count;
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
|
||||
unsigned int irq_hv_callback_count;
|
||||
#endif
|
||||
|
@ -73,6 +73,7 @@ extern asmlinkage void invalidate_interrupt31(void);
|
||||
extern asmlinkage void irq_move_cleanup_interrupt(void);
|
||||
extern asmlinkage void reboot_interrupt(void);
|
||||
extern asmlinkage void threshold_interrupt(void);
|
||||
extern asmlinkage void deferred_error_interrupt(void);
|
||||
|
||||
extern asmlinkage void call_function_interrupt(void);
|
||||
extern asmlinkage void call_function_single_interrupt(void);
|
||||
@ -87,6 +88,7 @@ extern void trace_spurious_interrupt(void);
|
||||
extern void trace_thermal_interrupt(void);
|
||||
extern void trace_reschedule_interrupt(void);
|
||||
extern void trace_threshold_interrupt(void);
|
||||
extern void trace_deferred_error_interrupt(void);
|
||||
extern void trace_call_function_interrupt(void);
|
||||
extern void trace_call_function_single_interrupt(void);
|
||||
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
|
||||
|
@ -102,21 +102,22 @@
|
||||
*/
|
||||
#define X86_PLATFORM_IPI_VECTOR 0xf7
|
||||
|
||||
/* Vector for KVM to deliver posted interrupt IPI */
|
||||
#ifdef CONFIG_HAVE_KVM
|
||||
#define POSTED_INTR_VECTOR 0xf2
|
||||
#endif
|
||||
|
||||
/*
|
||||
* IRQ work vector:
|
||||
*/
|
||||
#define IRQ_WORK_VECTOR 0xf6
|
||||
|
||||
#define UV_BAU_MESSAGE 0xf5
|
||||
#define DEFERRED_ERROR_VECTOR 0xf4
|
||||
|
||||
/* Vector on which hypervisor callbacks will be delivered */
|
||||
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
|
||||
|
||||
/* Vector for KVM to deliver posted interrupt IPI */
|
||||
#ifdef CONFIG_HAVE_KVM
|
||||
#define POSTED_INTR_VECTOR 0xf2
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Local APIC timer IRQ vector is on a different priority level,
|
||||
* to work around the 'lost local interrupt if more than 2 IRQ
|
||||
|
@ -117,8 +117,19 @@ struct mca_config {
|
||||
};
|
||||
|
||||
struct mce_vendor_flags {
|
||||
__u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
|
||||
__reserved_0 : 63;
|
||||
/*
|
||||
* overflow recovery cpuid bit indicates that overflow
|
||||
* conditions are not fatal
|
||||
*/
|
||||
__u64 overflow_recov : 1,
|
||||
|
||||
/*
|
||||
* SUCCOR stands for S/W UnCorrectable error COntainment
|
||||
* and Recovery. It indicates support for data poisoning
|
||||
* in HW and deferred error interrupts.
|
||||
*/
|
||||
succor : 1,
|
||||
__reserved_0 : 62;
|
||||
};
|
||||
extern struct mce_vendor_flags mce_flags;
|
||||
|
||||
@ -223,6 +234,9 @@ void do_machine_check(struct pt_regs *, long);
|
||||
extern void (*mce_threshold_vector)(void);
|
||||
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||
|
||||
/* Deferred error interrupt handler */
|
||||
extern void (*deferred_error_int_vector)(void);
|
||||
|
||||
/*
|
||||
* Thermal handler
|
||||
*/
|
||||
|
@ -100,6 +100,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
|
||||
*/
|
||||
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
|
||||
|
||||
/*
|
||||
* deferred_error_apic - called when entering/exiting a deferred apic interrupt
|
||||
* vector handler
|
||||
*/
|
||||
DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
|
||||
|
||||
/*
|
||||
* thermal_apic - called when entering/exiting a thermal apic interrupt
|
||||
* vector handler
|
||||
|
@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
|
||||
void math_emulate(struct math_emu_info *);
|
||||
#ifndef CONFIG_X86_32
|
||||
asmlinkage void smp_thermal_interrupt(void);
|
||||
asmlinkage void mce_threshold_interrupt(void);
|
||||
asmlinkage void smp_threshold_interrupt(void);
|
||||
asmlinkage void smp_deferred_error_interrupt(void);
|
||||
#endif
|
||||
|
||||
extern enum ctx_state ist_enter(struct pt_regs *regs);
|
||||
|
@ -1637,10 +1637,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
mce_intel_feature_init(c);
|
||||
mce_adjust_timer = cmci_intel_adjust_timer;
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
|
||||
case X86_VENDOR_AMD: {
|
||||
u32 ebx = cpuid_ebx(0x80000007);
|
||||
|
||||
mce_amd_feature_init(c);
|
||||
mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
|
||||
mce_flags.overflow_recov = !!(ebx & BIT(0));
|
||||
mce_flags.succor = !!(ebx & BIT(1));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1,19 +1,13 @@
|
||||
/*
|
||||
* (c) 2005-2012 Advanced Micro Devices, Inc.
|
||||
* (c) 2005-2015 Advanced Micro Devices, Inc.
|
||||
* Your use of this code is subject to the terms and conditions of the
|
||||
* GNU general public license version 2. See "COPYING" or
|
||||
* http://www.gnu.org/licenses/gpl.html
|
||||
*
|
||||
* Written by Jacob Shin - AMD, Inc.
|
||||
*
|
||||
* Maintained by: Borislav Petkov <bp@alien8.de>
|
||||
*
|
||||
* April 2006
|
||||
* - added support for AMD Family 0x10 processors
|
||||
* May 2012
|
||||
* - major scrubbing
|
||||
*
|
||||
* All MC4_MISCi registers are shared between multi-cores
|
||||
* All MC4_MISCi registers are shared between cores on a node.
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
@ -32,6 +26,7 @@
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
#define NR_BLOCKS 9
|
||||
#define THRESHOLD_MAX 0xFFF
|
||||
@ -47,6 +42,13 @@
|
||||
#define MASK_BLKPTR_LO 0xFF000000
|
||||
#define MCG_XBLK_ADDR 0xC0000400
|
||||
|
||||
/* Deferred error settings */
|
||||
#define MSR_CU_DEF_ERR 0xC0000410
|
||||
#define MASK_DEF_LVTOFF 0x000000F0
|
||||
#define MASK_DEF_INT_TYPE 0x00000006
|
||||
#define DEF_LVT_OFF 0x2
|
||||
#define DEF_INT_TYPE_APIC 0x2
|
||||
|
||||
static const char * const th_names[] = {
|
||||
"load_store",
|
||||
"insn_fetch",
|
||||
@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
|
||||
|
||||
static void amd_threshold_interrupt(void);
|
||||
static void amd_deferred_error_interrupt(void);
|
||||
|
||||
static void default_deferred_error_interrupt(void)
|
||||
{
|
||||
pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
|
||||
}
|
||||
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
|
||||
|
||||
/*
|
||||
* CPU Initialization
|
||||
@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
|
||||
threshold_restart_bank(&tr);
|
||||
};
|
||||
|
||||
static int setup_APIC_mce(int reserved, int new)
|
||||
static int setup_APIC_mce_threshold(int reserved, int new)
|
||||
{
|
||||
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
|
||||
APIC_EILVT_MSG_FIX, 0))
|
||||
@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
|
||||
return reserved;
|
||||
}
|
||||
|
||||
static int setup_APIC_deferred_error(int reserved, int new)
|
||||
{
|
||||
if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
|
||||
APIC_EILVT_MSG_FIX, 0))
|
||||
return new;
|
||||
|
||||
return reserved;
|
||||
}
|
||||
|
||||
static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 low = 0, high = 0;
|
||||
int def_offset = -1, def_new;
|
||||
|
||||
if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
|
||||
return;
|
||||
|
||||
def_new = (low & MASK_DEF_LVTOFF) >> 4;
|
||||
if (!(low & MASK_DEF_LVTOFF)) {
|
||||
pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
|
||||
def_new = DEF_LVT_OFF;
|
||||
low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
|
||||
}
|
||||
|
||||
def_offset = setup_APIC_deferred_error(def_offset, def_new);
|
||||
if ((def_offset == def_new) &&
|
||||
(deferred_error_int_vector != amd_deferred_error_interrupt))
|
||||
deferred_error_int_vector = amd_deferred_error_interrupt;
|
||||
|
||||
low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
|
||||
wrmsr(MSR_CU_DEF_ERR, low, high);
|
||||
}
|
||||
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
|
||||
b.interrupt_enable = 1;
|
||||
new = (high & MASK_LVTOFF_HI) >> 20;
|
||||
offset = setup_APIC_mce(offset, new);
|
||||
offset = setup_APIC_mce_threshold(offset, new);
|
||||
|
||||
if ((offset == new) &&
|
||||
(mce_threshold_vector != amd_threshold_interrupt))
|
||||
@ -262,6 +304,73 @@ init:
|
||||
mce_threshold_block_init(&b, offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (mce_flags.succor)
|
||||
deferred_error_interrupt_enable(c);
|
||||
}
|
||||
|
||||
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
|
||||
{
|
||||
struct mce m;
|
||||
u64 status;
|
||||
|
||||
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
|
||||
if (!(status & MCI_STATUS_VAL))
|
||||
return;
|
||||
|
||||
mce_setup(&m);
|
||||
|
||||
m.status = status;
|
||||
m.bank = bank;
|
||||
|
||||
if (threshold_err)
|
||||
m.misc = misc;
|
||||
|
||||
if (m.status & MCI_STATUS_ADDRV)
|
||||
rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
|
||||
|
||||
mce_log(&m);
|
||||
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
|
||||
}
|
||||
|
||||
static inline void __smp_deferred_error_interrupt(void)
|
||||
{
|
||||
inc_irq_stat(irq_deferred_error_count);
|
||||
deferred_error_int_vector();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_deferred_error_interrupt(void)
|
||||
{
|
||||
entering_irq();
|
||||
__smp_deferred_error_interrupt();
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
|
||||
{
|
||||
entering_irq();
|
||||
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
|
||||
__smp_deferred_error_interrupt();
|
||||
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
/* APIC interrupt handler for deferred errors */
|
||||
static void amd_deferred_error_interrupt(void)
|
||||
{
|
||||
u64 status;
|
||||
unsigned int bank;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
|
||||
|
||||
if (!(status & MCI_STATUS_VAL) ||
|
||||
!(status & MCI_STATUS_DEFERRED))
|
||||
continue;
|
||||
|
||||
__log_error(bank, false, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -273,12 +382,12 @@ init:
|
||||
* the interrupt goes off when error_count reaches threshold_limit.
|
||||
* the handler will simply log mcelog w/ software defined bank number.
|
||||
*/
|
||||
|
||||
static void amd_threshold_interrupt(void)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
int cpu = smp_processor_id();
|
||||
unsigned int bank, block;
|
||||
struct mce m;
|
||||
|
||||
/* assume first bank caused it */
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
|
||||
return;
|
||||
|
||||
log:
|
||||
mce_setup(&m);
|
||||
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
|
||||
if (!(m.status & MCI_STATUS_VAL))
|
||||
return;
|
||||
m.misc = ((u64)high << 32) | low;
|
||||
m.bank = bank;
|
||||
mce_log(&m);
|
||||
|
||||
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
|
||||
__log_error(bank, true, ((u64)high << 32) | low);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -935,6 +935,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
|
||||
threshold_interrupt smp_threshold_interrupt
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
apicinterrupt DEFERRED_ERROR_VECTOR \
|
||||
deferred_error_interrupt smp_deferred_error_interrupt
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
apicinterrupt THERMAL_APIC_VECTOR \
|
||||
thermal_interrupt smp_thermal_interrupt
|
||||
|
@ -116,6 +116,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
|
||||
seq_puts(p, " Threshold APIC interrupts\n");
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
seq_printf(p, "%*s: ", prec, "DFR");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
|
||||
seq_puts(p, " Deferred Error APIC interrupts\n");
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
seq_printf(p, "%*s: ", prec, "MCE");
|
||||
for_each_online_cpu(j)
|
||||
|
@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
|
||||
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
/* self generated IPI for local APIC timer */
|
||||
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
|
||||
|
@ -813,18 +813,6 @@ dotraplinkage void
|
||||
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
conditional_sti(regs);
|
||||
#if 0
|
||||
/* No need to warn about this any longer. */
|
||||
pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
|
||||
{
|
||||
}
|
||||
|
||||
asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user