Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "Main changes in this cycle were:

   - AMD MCE/RAS handling updates (Yazen Ghannam, Aravind
     Gopalakrishnan)

   - Cleanups (Borislav Petkov)

   - logging fix (Tony Luck)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/RAS: Add SMCA support to AMD Error Injector
  EDAC, mce_amd: Detect SMCA using X86_FEATURE_SMCA
  x86/mce: Update AMD mcheck init to use cpu_has() facilities
  x86/cpu: Add detection of AMD RAS Capabilities
  x86/mce/AMD: Save an indentation level in prepare_threshold_block()
  x86/mce/AMD: Disable LogDeferredInMcaStat for SMCA systems
  x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers
  x86/mce: Detect local MCEs properly
  x86/mce: Look in genpool instead of mcelog for pending error records
  x86/mce: Detect and use SMCA-specific msr_ops
  x86/mce: Define vendor-specific MSR accessors
  x86/mce: Carve out writes to MCx_STATUS and MCx_CTL
  x86/mce: Grade uncorrected errors for SMCA-enabled systems
  x86/mce: Log MCEs after a warm rest on AMD, Fam17h and later
  x86/mce: Remove explicit smp_rmb() when starting CPUs sync
  x86/RAS: Rename AMD MCE injector config item
This commit is contained in:
Linus Torvalds 2016-05-16 14:24:51 -07:00
commit cf6ed9a668
13 changed files with 333 additions and 97 deletions

View File

@ -27,6 +27,7 @@ enum cpuid_leafs
CPUID_6_EAX,
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
};
#ifdef CONFIG_X86_FEATURE_NAMES

View File

@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
#define NCAPINTS 17 /* N 32-bit words worth of info */
#define NCAPINTS 18 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@ -282,6 +282,11 @@
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
/*
* BUG word(s)
*/

View File

@ -104,13 +104,23 @@
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
#define MSR_AMD64_SMCA_MC0_CTL 0xc0002000
#define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001
#define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
/*
@ -168,9 +178,18 @@ struct mce_vendor_flags {
__reserved_0 : 61;
};
struct mca_msr_regs {
u32 (*ctl) (int bank);
u32 (*status) (int bank);
u32 (*addr) (int bank);
u32 (*misc) (int bank);
};
extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg;
extern struct mca_msr_regs msr_ops;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);

View File

@ -717,6 +717,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
}
}
if (c->extended_cpuid_level >= 0x80000007) {
cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_8000_0007_EBX] = ebx;
c->x86_power = edx;
}
if (c->extended_cpuid_level >= 0x80000008) {
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
@ -729,9 +736,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36;
#endif
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);

View File

@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
/*
* Compare the record "t" with each of the records on list "l" to see if
* an equivalent one is present in the list.
*/
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{
struct mce_evt_llist *node;
struct mce *m1, *m2;
m1 = &t->mce;
llist_for_each_entry(node, &l->llnode, llnode) {
m2 = &node->mce;
if (!mce_cmp(m1, m2))
return true;
}
return false;
}
/*
* The system has panicked - we'd like to peruse the list of MCE records
* that have been queued, but not seen by anyone yet. The list is in
* reverse time order, so we need to reverse it. While doing that we can
* also drop duplicate records (these were logged because some banks are
* shared between cores or by all threads on a socket).
*/
struct llist_node *mce_gen_pool_prepare_records(void)
{
struct llist_node *head;
LLIST_HEAD(new_head);
struct mce_evt_llist *node, *t;
head = llist_del_all(&mce_event_llist);
if (!head)
return NULL;
/* squeeze out duplicates while reversing order */
llist_for_each_entry_safe(node, t, head, llnode) {
if (!is_duplicate_mce_record(node, t))
llist_add(&node->llnode, &new_head);
}
return new_head.first;
}
void mce_gen_pool_process(void)
{
struct llist_node *head;

View File

@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
#endif
void mce_inject_log(struct mce *m);
/*
* We consider records to be equivalent if bank+status+addr+misc all match.
* This is only used when the system is going down because of a fatal error
* to avoid cluttering the console log with essentially repeated information.
* In normal processing all errors seen are logged.
*/
static inline bool mce_cmp(struct mce *m1, struct mce *m2)
{
return m1->bank != m2->bank ||
m1->status != m2->status ||
m1->addr != m2->addr ||
m1->misc != m2->misc;
}

View File

@ -204,6 +204,33 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}
static int mce_severity_amd_smca(struct mce *m, int err_ctx)
{
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
u32 low, high;
/*
* We need to look at the following bits:
* - "succor" bit (data poisoning support), and
* - TCC bit (Task Context Corrupt)
* in MCi_STATUS to determine error severity.
*/
if (!mce_flags.succor)
return MCE_PANIC_SEVERITY;
if (rdmsr_safe(addr, &low, &high))
return MCE_PANIC_SEVERITY;
/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
if ((low & MCI_CONFIG_MCAX) &&
(m->status & MCI_STATUS_TCC) &&
(err_ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
/* ...otherwise invoke hwpoison handler. */
return MCE_AR_SEVERITY;
}
/*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
if (mce_flags.smca)
return mce_severity_amd_smca(m, ctx);
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;

View File

@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
mce->finished = 0;
wmb();
for (;;) {
entry = mce_log_get_idx_check(mcelog.next);
@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
mcelog.entry[entry].finished = 1;
wmb();
mce->finished = 1;
set_bit(0, &mce_need_notify);
}
@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
static inline u32 ctl_reg(int bank)
{
return MSR_IA32_MCx_CTL(bank);
}
static inline u32 status_reg(int bank)
{
return MSR_IA32_MCx_STATUS(bank);
}
static inline u32 addr_reg(int bank)
{
return MSR_IA32_MCx_ADDR(bank);
}
static inline u32 misc_reg(int bank)
{
return MSR_IA32_MCx_MISC(bank);
}
static inline u32 smca_ctl_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_CTL(bank);
}
static inline u32 smca_status_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_STATUS(bank);
}
static inline u32 smca_addr_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_ADDR(bank);
}
static inline u32 smca_misc_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_MISC(bank);
}
struct mca_msr_regs msr_ops = {
.ctl = ctl_reg,
.status = status_reg,
.addr = addr_reg,
.misc = misc_reg
};
static void print_mce(struct mce *m)
{
int ret = 0;
@ -290,7 +335,9 @@ static void wait_for_panic(void)
static void mce_panic(const char *msg, struct mce *final, char *exp)
{
int i, apei_err = 0;
int apei_err = 0;
struct llist_node *pending;
struct mce_evt_llist *l;
if (!fake_panic) {
/*
@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
if (atomic_inc_return(&mce_fake_panicked) > 1)
return;
}
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
for (i = 0; i < MCE_LOG_LEN; i++) {
struct mce *m = &mcelog.entry[i];
if (!(m->status & MCI_STATUS_VAL))
continue;
llist_for_each_entry(l, pending, llnode) {
struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
if (!apei_err)
@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
}
}
/* Now print uncorrected but with the final one last */
for (i = 0; i < MCE_LOG_LEN; i++) {
struct mce *m = &mcelog.entry[i];
if (!(m->status & MCI_STATUS_VAL))
continue;
llist_for_each_entry(l, pending, llnode) {
struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC))
continue;
if (!final || memcmp(m, final, sizeof(struct mce))) {
if (!final || mce_cmp(m, final)) {
print_mce(m);
if (!apei_err)
apei_err = apei_write_mce(m);
@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
if (msr == msr_ops.status(bank))
return offsetof(struct mce, status);
if (msr == MSR_IA32_MCx_ADDR(bank))
if (msr == msr_ops.addr(bank))
return offsetof(struct mce, addr);
if (msr == MSR_IA32_MCx_MISC(bank))
if (msr == msr_ops.misc(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
m->misc = mce_rdmsrl(msr_ops.misc(i));
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
m->addr = mce_rdmsrl(msr_ops.addr(i));
/*
* Mask the reported address by the reported granularity.
@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0;
barrier();
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/*
* Clear state for this bank.
*/
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
mce_wrmsrl(msr_ops.status(i), 0);
}
/*
@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
m->status = mce_rdmsrl(msr_ops.status(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
atomic_add(*no_way_out, &global_nwo);
/*
* global_nwo should be updated before mce_callin
* Rely on the implied barrier below, such that global_nwo
* is updated before mce_callin.
*/
smp_wmb();
order = atomic_inc_return(&mce_callin);
/*
@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
mce_wrmsrl(msr_ops.status(i), 0);
}
}
@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int i;
int worst = 0;
int severity;
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
int order;
int order = -1;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
int lmce = 0;
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
int lmce = 1;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
/*
* Check if this MCE is signaled to only this logical processor
* Check if this MCE is signaled to only this logical processor,
* on Intel only.
*/
if (m.mcgstatus & MCG_STATUS_LMCES)
lmce = 1;
else {
if (m.cpuvendor == X86_VENDOR_INTEL)
lmce = m.mcgstatus & MCG_STATUS_LMCES;
/*
* Go through all the banks in exclusion of the other CPUs.
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
* If this is a Local MCE, then no need to perform rendezvous.
* Go through all banks in exclusion of the other CPUs. This way we
* don't report duplicated events on shared banks because the first one
* to see it will clear it. If this is a Local MCE, then no need to
* perform rendezvous.
*/
if (!lmce)
order = mce_start(&no_way_out);
}
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
m.addr = 0;
m.bank = i;
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
m.status = mce_rdmsrl(msr_ops.status(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
int i;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
static void __mcheck_cpu_init_clear_banks(void)
{
int i;
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
wrmsrl(msr_ops.ctl(i), b->ctl);
wrmsrl(msr_ops.status(i), 0);
}
}
@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
if (c->x86 <= 17 && cfg->bootlog < 0) {
if (c->x86 < 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007);
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1));
mce_flags.smca = !!(ebx & BIT(3));
/*
* Install proper ops for Scalable MCA enabled processors
*/
if (mce_flags.smca) {
msr_ops.ctl = smca_ctl_reg;
msr_ops.status = smca_status_reg;
msr_ops.addr = smca_addr_reg;
msr_ops.misc = smca_misc_reg;
}
mce_amd_feature_init(c);
break;
@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
wrmsrl(msr_ops.ctl(i), 0);
}
return;
}
@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
__mcheck_cpu_init_clear_banks();
}
static struct syscore_ops mce_syscore_ops = {
@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
wrmsrl(msr_ops.ctl(i), b->ctl);
}
}

View File

@ -54,14 +54,6 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
/*
* OS is required to set the MCAX bit to acknowledge that it is now using the
* new MSR ranges and new registers under each bank. It also means that the OS
* will configure deferred errors in the new MCx_CONFIG register. If the bit is
* not set, uncorrectable errors will cause a system panic.
*/
#define SMCA_MCAX_EN_OFF 0x1
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
addr = MSR_IA32_MCx_MISC(bank);
addr = msr_ops.misc(bank);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
u32 smca_low, smca_high, smca_addr;
struct threshold_block b;
int new;
@ -369,12 +362,39 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
b.interrupt_enable = 1;
if (mce_flags.smca) {
u32 smca_low, smca_high;
u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
if (!mce_flags.smca) {
new = (misc_high & MASK_LVTOFF_HI) >> 20;
goto set_offset;
}
smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
smca_high |= SMCA_MCAX_EN_OFF;
/*
* OS is required to set the MCAX bit to acknowledge that it is
* now using the new MSR ranges and new registers under each
* bank. It also means that the OS will configure deferred
* errors in the new MCx_CONFIG register. If the bit is not set,
* uncorrectable errors will cause a system panic.
*
* MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
*/
smca_high |= BIT(0);
/*
* SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
* registers with the option of additionally logging to
* MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
*
* This bit is usually set by BIOS to retain the old behavior
* for OSes that don't use the new registers. Linux supports the
* new registers so let's disable that additional logging here.
*
* MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
* portion of the MSR).
*/
smca_high &= ~BIT(2);
wrmsr(smca_addr, smca_low, smca_high);
}
@ -383,10 +403,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
goto out;
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
} else {
new = (misc_high & MASK_LVTOFF_HI) >> 20;
}
set_offset:
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
u32 msr_status = msr_ops.status(bank);
u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
WARN_ON_ONCE(deferred_err && threshold_err);
if (deferred_err && mce_flags.smca) {
msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
}
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL))
return;
@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
rdmsrl(msr_addr, m.addr);
mce_log(&m);
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
wrmsrl(msr_status, 0);
}
static inline void __smp_deferred_error_interrupt(void)
@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
u64 status;
unsigned int bank;
u32 msr_status;
u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
: msr_ops.status(bank);
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;
__log_error(bank, false, 0);
__log_error(bank, true, false, 0);
break;
}
}
@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
return;
log:
__log_error(bank, true, ((u64)high << 32) | low);
__log_error(bank, false, true, ((u64)high << 32) | low);
}
/*

View File

@ -1,4 +1,4 @@
config AMD_MCE_INJ
config MCE_AMD_INJ
tristate "Simple MCE injection interface for AMD processors"
depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
default n

View File

@ -1,2 +1,2 @@
obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o
obj-$(CONFIG_MCE_AMD_INJ) += mce_amd_inj.o

View File

@ -290,6 +290,24 @@ static void do_inject(void)
wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
(u32)mcg_status, (u32)(mcg_status >> 32));
if (boot_cpu_has(X86_FEATURE_SMCA)) {
if (inj_type == DFR_INT_INJ) {
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
(u32)i_mce.addr, (u32)(i_mce.addr >> 32));
} else {
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
(u32)i_mce.addr, (u32)(i_mce.addr >> 32));
}
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
(u32)i_mce.misc, (u32)(i_mce.misc >> 32));
} else {
wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
@ -298,6 +316,7 @@ static void do_inject(void)
wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
(u32)i_mce.misc, (u32)(i_mce.misc >> 32));
}
toggle_hw_mce_inject(cpu, false);

View File

@ -1052,7 +1052,6 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc;
u32 ebx = cpuid_ebx(0x80000007);
if (amd_filter_mce(m))
return NOTIFY_STOP;
@ -1075,7 +1074,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
if (!!(ebx & BIT(3))) {
if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
@ -1094,7 +1093,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
if (!!(ebx & BIT(3))) {
if (boot_cpu_has(X86_FEATURE_SMCA)) {
decode_smca_errors(m);
goto err_code;
}
@ -1149,7 +1148,6 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
u32 ebx;
if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
@ -1205,9 +1203,8 @@ static int __init mce_amd_init(void)
break;
case 0x17:
ebx = cpuid_ebx(0x80000007);
xec_mask = 0x3f;
if (!(ebx & BIT(3))) {
if (!boot_cpu_has(X86_FEATURE_SMCA)) {
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
goto err_out;
}