- Log and handle twp new AMD-specific MCA registers: SYND1 and SYND2 and

report the Field Replaceable Unit text info reported through them
 
 - Add support for handling variable-sized SMCA BERT records
 
 - Add the capability for reporting vendor-specific RAS error info without
   adding vendor-specific fields to struct mce
 
 - Cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmc7OlEACgkQEsHwGGHe
 VUpXihAAgVdZExo/1Rmbh6s/259BH38GP6fL+ePaT1SlUzNi770TY2b7I4OYlms4
 xa9t8LAIVMrrIMIg6w6q8JN4YHAQoVdcbRBvHQYB1a24xtoyxaEJxLKQNLA1soUQ
 Jc9asWMHBuXnLfR/4S8Y2vWrzByOSwxqDBzQCu0Ryqvbg7vdRicNt+Hk9oHHIAYy
 cquZpoDGL3W6BA8sXONbEW/6rcQ33JsEQ+Ub4qr1q2g+kNwXrrFuXZlojmz2MxIs
 xgqeYKyrxK6heX0l8dSiipCATA+sOXXWWzbZtdPjFtDGzwIlV3p4yXN3fucrmHm1
 4Fg1gW5a1V82Qosn0FbGiZPojsahhOE2k1bz+yEMDM3Sg2qeRWcK+V3jiS5zKzPd
 WWqUbRtcaxayoEsAXnWrxrp3vxhlUUf1Ivtgk8mlMjhHPLijV5iranrRj+XHEikR
 H0D3Vm0T1LHCPf9AUsbmo0GAfAOeO9DTAB9LJdKv+OJ4ESVgSPJW/9NKWLXKq41p
 hhs7seJTYNw8sp67cL23TnkSp3S+9kd2U7Od3T1kubtd4fVxVnlowu8Fc6kjqd8v
 n+GbdLxhX7GbOgnT0z2OG5Xmc1pNW1JtRbuxSK59NFNia7r6ZkR7BE/OCtL82Rfm
 u7i76z1O0lV91y93GMCyP9DYn8K1ceU7gVCveY6mx/AHgzc87d8=
 =djpG
 -----END PGP SIGNATURE-----

Merge tag 'ras_core_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Log and handle twp new AMD-specific MCA registers: SYND1 and SYND2
   and report the Field Replaceable Unit text info reported through them

 - Add support for handling variable-sized SMCA BERT records

 - Add the capability for reporting vendor-specific RAS error info
   without adding vendor-specific fields to struct mce

 - Cleanups

* tag 'ras_core_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/mce_amd: Add support for FRU text in MCA
  x86/mce/apei: Handle variable SMCA BERT record size
  x86/MCE/AMD: Add support for new MCA_SYND{1,2} registers
  tracing: Add __print_dynamic_array() helper
  x86/mce: Add wrapper for struct mce to export vendor specific info
  x86/mce/intel: Use MCG_BANKCNT_MASK instead of 0xff
  x86/mce/mcelog: Use xchg() to get and clear the flags
This commit is contained in:
Linus Torvalds 2024-11-19 12:04:51 -08:00
commit c1f2ffe207
15 changed files with 329 additions and 191 deletions

View File

@ -61,6 +61,7 @@
* - TCC bit is present in MCx_STATUS. * - TCC bit is present in MCx_STATUS.
*/ */
#define MCI_CONFIG_MCAX 0x1 #define MCI_CONFIG_MCAX 0x1
#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
#define MCI_IPID_MCATYPE 0xFFFF0000 #define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF #define MCI_IPID_HWID 0xFFF
@ -122,6 +123,9 @@
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
/* Registers MISC2 to MISC4 are at offsets B to D. */
#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@ -132,6 +136,8 @@
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
#define XEC(x, mask) (((x) >> 16) & mask) #define XEC(x, mask) (((x) >> 16) & mask)
@ -187,6 +193,32 @@ enum mce_notifier_prios {
MCE_PRIO_HIGHEST = MCE_PRIO_CEC MCE_PRIO_HIGHEST = MCE_PRIO_CEC
}; };
/**
* struct mce_hw_err - Hardware Error Record.
* @m: Machine Check record.
* @vendor: Vendor-specific error information.
*
* Vendor-specific fields should not be added to struct mce. Instead, vendors
* should export their vendor-specific data through their structure in the
* vendor union below.
*
* AMD's vendor data is parsed by error decoding tools for supplemental error
* information. Thus, current offsets of existing fields must be maintained.
* Only add new fields at the end of AMD's vendor structure.
*/
struct mce_hw_err {
struct mce m;
union vendor_info {
struct {
u64 synd1; /* MCA_SYND1 MSR */
u64 synd2; /* MCA_SYND2 MSR */
} amd;
} vendor;
};
#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
struct notifier_block; struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb);
@ -221,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; } u64 lapic_id) { return -EINVAL; }
#endif #endif
void mce_prep_record(struct mce *m); void mce_prep_record(struct mce_hw_err *err);
void mce_log(struct mce *m); void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device); DECLARE_PER_CPU(struct device *, mce_device);
/* Maximum number of MCA banks per CPU. */ /* Maximum number of MCA banks per CPU. */

View File

@ -8,7 +8,8 @@
/* /*
* Fields are zero when not available. Also, this struct is shared with * Fields are zero when not available. Also, this struct is shared with
* userspace mcelog and thus must keep existing fields at current offsets. * userspace mcelog and thus must keep existing fields at current offsets.
* Only add new fields to the end of the structure * Only add new, shared fields to the end of the structure.
* Do not add vendor-specific fields.
*/ */
struct mce { struct mce {
__u64 status; /* Bank's MCi_STATUS MSR */ __u64 status; /* Bank's MCi_STATUS MSR */

View File

@ -778,29 +778,33 @@ bool amd_mce_usable_address(struct mce *m)
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{ {
struct mce m; struct mce_hw_err err;
struct mce *m = &err.m;
mce_prep_record(&m); mce_prep_record(&err);
m.status = status; m->status = status;
m.misc = misc; m->misc = misc;
m.bank = bank; m->bank = bank;
m.tsc = rdtsc(); m->tsc = rdtsc();
if (m.status & MCI_STATUS_ADDRV) { if (m->status & MCI_STATUS_ADDRV) {
m.addr = addr; m->addr = addr;
smca_extract_err_addr(&m); smca_extract_err_addr(m);
} }
if (mce_flags.smca) { if (mce_flags.smca) {
rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
if (m.status & MCI_STATUS_SYNDV) if (m->status & MCI_STATUS_SYNDV) {
rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
}
} }
mce_log(&m); mce_log(&err);
} }
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)

View File

@ -28,7 +28,8 @@
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{ {
struct mce m; struct mce_hw_err err;
struct mce *m;
int lsb; int lsb;
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
@ -44,31 +45,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
else else
lsb = PAGE_SHIFT; lsb = PAGE_SHIFT;
mce_prep_record(&m); mce_prep_record(&err);
m.bank = -1; m = &err.m;
m->bank = -1;
/* Fake a memory read error with unknown channel */ /* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f; m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb; m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
if (severity >= GHES_SEV_RECOVERABLE) if (severity >= GHES_SEV_RECOVERABLE)
m.status |= MCI_STATUS_UC; m->status |= MCI_STATUS_UC;
if (severity >= GHES_SEV_PANIC) { if (severity >= GHES_SEV_PANIC) {
m.status |= MCI_STATUS_PCC; m->status |= MCI_STATUS_PCC;
m.tsc = rdtsc(); m->tsc = rdtsc();
} }
m.addr = mem_err->physical_addr; m->addr = mem_err->physical_addr;
mce_log(&m); mce_log(&err);
} }
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{ {
const u64 *i_mce = ((const u64 *) (ctx_info + 1)); const u64 *i_mce = ((const u64 *) (ctx_info + 1));
unsigned int cpu, num_regs;
bool apicid_found = false; bool apicid_found = false;
unsigned int cpu; struct mce_hw_err err;
struct mce m; struct mce *m;
if (!boot_cpu_has(X86_FEATURE_SMCA)) if (!boot_cpu_has(X86_FEATURE_SMCA))
return -EINVAL; return -EINVAL;
@ -86,16 +89,12 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
return -EINVAL; return -EINVAL;
/* /*
* The register array size must be large enough to include all the
* SMCA registers which need to be extracted.
*
* The number of registers in the register array is determined by * The number of registers in the register array is determined by
* Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2. * Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
* The register layout is fixed and currently the raw data in the * Sanity-check registers array size.
* register array includes 6 SMCA registers which the kernel can
* extract.
*/ */
if (ctx_info->reg_arr_size < 48) num_regs = ctx_info->reg_arr_size >> 3;
if (!num_regs)
return -EINVAL; return -EINVAL;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
@ -108,18 +107,68 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (!apicid_found) if (!apicid_found)
return -EINVAL; return -EINVAL;
mce_prep_record_common(&m); m = &err.m;
mce_prep_record_per_cpu(cpu, &m); memset(&err, 0, sizeof(struct mce_hw_err));
mce_prep_record_common(m);
mce_prep_record_per_cpu(cpu, m);
m.bank = (ctx_info->msr_addr >> 4) & 0xFF; m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
m.status = *i_mce;
m.addr = *(i_mce + 1);
m.misc = *(i_mce + 2);
/* Skipping MCA_CONFIG */
m.ipid = *(i_mce + 4);
m.synd = *(i_mce + 5);
mce_log(&m); /*
* The SMCA register layout is fixed and includes 16 registers.
* The end of the array may be variable, but the beginning is known.
* Cap the number of registers to expected max (15).
*/
if (num_regs > 15)
num_regs = 15;
switch (num_regs) {
/* MCA_SYND2 */
case 15:
err.vendor.amd.synd2 = *(i_mce + 14);
fallthrough;
/* MCA_SYND1 */
case 14:
err.vendor.amd.synd1 = *(i_mce + 13);
fallthrough;
/* MCA_MISC4 */
case 13:
/* MCA_MISC3 */
case 12:
/* MCA_MISC2 */
case 11:
/* MCA_MISC1 */
case 10:
/* MCA_DEADDR */
case 9:
/* MCA_DESTAT */
case 8:
/* reserved */
case 7:
/* MCA_SYND */
case 6:
m->synd = *(i_mce + 5);
fallthrough;
/* MCA_IPID */
case 5:
m->ipid = *(i_mce + 4);
fallthrough;
/* MCA_CONFIG */
case 4:
/* MCA_MISC0 */
case 3:
m->misc = *(i_mce + 2);
fallthrough;
/* MCA_ADDR */
case 2:
m->addr = *(i_mce + 1);
fallthrough;
/* MCA_STATUS */
case 1:
m->status = *i_mce;
}
mce_log(&err);
return 0; return 0;
} }

View File

@ -88,7 +88,7 @@ struct mca_config mca_cfg __read_mostly = {
.monarch_timeout = -1 .monarch_timeout = -1
}; };
static DEFINE_PER_CPU(struct mce, mces_seen); static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
static unsigned long mce_need_notify; static unsigned long mce_need_notify;
/* /*
@ -119,8 +119,6 @@ BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
void mce_prep_record_common(struct mce *m) void mce_prep_record_common(struct mce *m)
{ {
memset(m, 0, sizeof(struct mce));
m->cpuid = cpuid_eax(1); m->cpuid = cpuid_eax(1);
m->cpuvendor = boot_cpu_data.x86_vendor; m->cpuvendor = boot_cpu_data.x86_vendor;
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP); m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
@ -138,9 +136,12 @@ void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
m->socketid = topology_physical_package_id(cpu); m->socketid = topology_physical_package_id(cpu);
} }
/* Do initial initialization of a struct mce */ /* Do initial initialization of struct mce_hw_err */
void mce_prep_record(struct mce *m) void mce_prep_record(struct mce_hw_err *err)
{ {
struct mce *m = &err->m;
memset(err, 0, sizeof(struct mce_hw_err));
mce_prep_record_common(m); mce_prep_record_common(m);
mce_prep_record_per_cpu(smp_processor_id(), m); mce_prep_record_per_cpu(smp_processor_id(), m);
} }
@ -148,9 +149,9 @@ void mce_prep_record(struct mce *m)
DEFINE_PER_CPU(struct mce, injectm); DEFINE_PER_CPU(struct mce, injectm);
EXPORT_PER_CPU_SYMBOL_GPL(injectm); EXPORT_PER_CPU_SYMBOL_GPL(injectm);
void mce_log(struct mce *m) void mce_log(struct mce_hw_err *err)
{ {
if (!mce_gen_pool_add(m)) if (!mce_gen_pool_add(err))
irq_work_queue(&mce_irq_work); irq_work_queue(&mce_irq_work);
} }
EXPORT_SYMBOL_GPL(mce_log); EXPORT_SYMBOL_GPL(mce_log);
@ -171,8 +172,10 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
} }
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
static void __print_mce(struct mce *m) static void __print_mce(struct mce_hw_err *err)
{ {
struct mce *m = &err->m;
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu, m->extcpu,
(m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""), (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
@ -199,6 +202,10 @@ static void __print_mce(struct mce *m)
if (mce_flags.smca) { if (mce_flags.smca) {
if (m->synd) if (m->synd)
pr_cont("SYND %llx ", m->synd); pr_cont("SYND %llx ", m->synd);
if (err->vendor.amd.synd1)
pr_cont("SYND1 %llx ", err->vendor.amd.synd1);
if (err->vendor.amd.synd2)
pr_cont("SYND2 %llx ", err->vendor.amd.synd2);
if (m->ipid) if (m->ipid)
pr_cont("IPID %llx ", m->ipid); pr_cont("IPID %llx ", m->ipid);
} }
@ -214,9 +221,11 @@ static void __print_mce(struct mce *m)
m->microcode); m->microcode);
} }
static void print_mce(struct mce *m) static void print_mce(struct mce_hw_err *err)
{ {
__print_mce(m); struct mce *m = &err->m;
__print_mce(err);
if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON) if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
@ -251,7 +260,7 @@ static const char *mce_dump_aux_info(struct mce *m)
return NULL; return NULL;
} }
static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) static noinstr void mce_panic(const char *msg, struct mce_hw_err *final, char *exp)
{ {
struct llist_node *pending; struct llist_node *pending;
struct mce_evt_llist *l; struct mce_evt_llist *l;
@ -282,20 +291,22 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
pending = mce_gen_pool_prepare_records(); pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */ /* First print corrected ones that are still unlogged */
llist_for_each_entry(l, pending, llnode) { llist_for_each_entry(l, pending, llnode) {
struct mce *m = &l->mce; struct mce_hw_err *err = &l->err;
struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC)) { if (!(m->status & MCI_STATUS_UC)) {
print_mce(m); print_mce(err);
if (!apei_err) if (!apei_err)
apei_err = apei_write_mce(m); apei_err = apei_write_mce(m);
} }
} }
/* Now print uncorrected but with the final one last */ /* Now print uncorrected but with the final one last */
llist_for_each_entry(l, pending, llnode) { llist_for_each_entry(l, pending, llnode) {
struct mce *m = &l->mce; struct mce_hw_err *err = &l->err;
struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC)) if (!(m->status & MCI_STATUS_UC))
continue; continue;
if (!final || mce_cmp(m, final)) { if (!final || mce_cmp(m, &final->m)) {
print_mce(m); print_mce(err);
if (!apei_err) if (!apei_err)
apei_err = apei_write_mce(m); apei_err = apei_write_mce(m);
} }
@ -303,12 +314,12 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
if (final) { if (final) {
print_mce(final); print_mce(final);
if (!apei_err) if (!apei_err)
apei_err = apei_write_mce(final); apei_err = apei_write_mce(&final->m);
} }
if (exp) if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp); pr_emerg(HW_ERR "Machine check: %s\n", exp);
memmsg = mce_dump_aux_info(final); memmsg = mce_dump_aux_info(&final->m);
if (memmsg) if (memmsg)
pr_emerg(HW_ERR "Machine check: %s\n", memmsg); pr_emerg(HW_ERR "Machine check: %s\n", memmsg);
@ -323,9 +334,9 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
* panic. * panic.
*/ */
if (kexec_crash_loaded()) { if (kexec_crash_loaded()) {
if (final && (final->status & MCI_STATUS_ADDRV)) { if (final && (final->m.status & MCI_STATUS_ADDRV)) {
struct page *p; struct page *p;
p = pfn_to_online_page(final->addr >> PAGE_SHIFT); p = pfn_to_online_page(final->m.addr >> PAGE_SHIFT);
if (p) if (p)
SetPageHWPoison(p); SetPageHWPoison(p);
} }
@ -445,16 +456,18 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
* check into our "mce" struct so that we can use it later to assess * check into our "mce" struct so that we can use it later to assess
* the severity of the problem as we read per-bank specific details. * the severity of the problem as we read per-bank specific details.
*/ */
static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs) static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs)
{ {
struct mce *m;
/* /*
* Enable instrumentation around mce_prep_record() which calls external * Enable instrumentation around mce_prep_record() which calls external
* facilities. * facilities.
*/ */
instrumentation_begin(); instrumentation_begin();
mce_prep_record(m); mce_prep_record(err);
instrumentation_end(); instrumentation_end();
m = &err->m;
m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
if (regs) { if (regs) {
/* /*
@ -574,13 +587,13 @@ EXPORT_SYMBOL_GPL(mce_is_correctable);
static int mce_early_notifier(struct notifier_block *nb, unsigned long val, static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
struct mce *m = (struct mce *)data; struct mce_hw_err *err = to_mce_hw_err(data);
if (!m) if (!err)
return NOTIFY_DONE; return NOTIFY_DONE;
/* Emit the trace record: */ /* Emit the trace record: */
trace_mce_record(m); trace_mce_record(err);
set_bit(0, &mce_need_notify); set_bit(0, &mce_need_notify);
@ -624,13 +637,13 @@ static struct notifier_block mce_uc_nb = {
static int mce_default_notifier(struct notifier_block *nb, unsigned long val, static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
struct mce *m = (struct mce *)data; struct mce_hw_err *err = to_mce_hw_err(data);
if (!m) if (!err)
return NOTIFY_DONE; return NOTIFY_DONE;
if (mca_cfg.print_all || !m->kflags) if (mca_cfg.print_all || !(err->m.kflags))
__print_mce(m); __print_mce(err);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
@ -644,8 +657,10 @@ static struct notifier_block mce_default_nb = {
/* /*
* Read ADDR and MISC registers. * Read ADDR and MISC registers.
*/ */
static noinstr void mce_read_aux(struct mce *m, int i) static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
{ {
struct mce *m = &err->m;
if (m->status & MCI_STATUS_MISCV) if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC)); m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
@ -667,8 +682,11 @@ static noinstr void mce_read_aux(struct mce *m, int i)
if (mce_flags.smca) { if (mce_flags.smca) {
m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
if (m->status & MCI_STATUS_SYNDV) if (m->status & MCI_STATUS_SYNDV) {
m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
}
} }
} }
@ -692,26 +710,28 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{ {
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mce m; struct mce_hw_err err;
struct mce *m;
int i; int i;
this_cpu_inc(mce_poll_count); this_cpu_inc(mce_poll_count);
mce_gather_info(&m, NULL); mce_gather_info(&err, NULL);
m = &err.m;
if (flags & MCP_TIMESTAMP) if (flags & MCP_TIMESTAMP)
m.tsc = rdtsc(); m->tsc = rdtsc();
for (i = 0; i < this_cpu_read(mce_num_banks); i++) { for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b)) if (!mce_banks[i].ctl || !test_bit(i, *b))
continue; continue;
m.misc = 0; m->misc = 0;
m.addr = 0; m->addr = 0;
m.bank = i; m->bank = i;
barrier(); barrier();
m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
/* /*
* Update storm tracking here, before checking for the * Update storm tracking here, before checking for the
@ -721,17 +741,17 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* storm status. * storm status.
*/ */
if (!mca_cfg.cmci_disabled) if (!mca_cfg.cmci_disabled)
mce_track_storm(&m); mce_track_storm(m);
/* If this entry is not valid, ignore it */ /* If this entry is not valid, ignore it */
if (!(m.status & MCI_STATUS_VAL)) if (!(m->status & MCI_STATUS_VAL))
continue; continue;
/* /*
* If we are logging everything (at CPU online) or this * If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it. * is a corrected error, then we must log it.
*/ */
if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC)) if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
goto log_it; goto log_it;
/* /*
@ -741,20 +761,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* everything else. * everything else.
*/ */
if (!mca_cfg.ser) { if (!mca_cfg.ser) {
if (m.status & MCI_STATUS_UC) if (m->status & MCI_STATUS_UC)
continue; continue;
goto log_it; goto log_it;
} }
/* Log "not enabled" (speculative) errors */ /* Log "not enabled" (speculative) errors */
if (!(m.status & MCI_STATUS_EN)) if (!(m->status & MCI_STATUS_EN))
goto log_it; goto log_it;
/* /*
* Log UCNA (SDM: 15.6.3 "UCR Error Classification") * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
* UC == 1 && PCC == 0 && S == 0 * UC == 1 && PCC == 0 && S == 0
*/ */
if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S)) if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
goto log_it; goto log_it;
/* /*
@ -768,20 +788,20 @@ log_it:
if (flags & MCP_DONTLOG) if (flags & MCP_DONTLOG)
goto clear_it; goto clear_it;
mce_read_aux(&m, i); mce_read_aux(&err, i);
m.severity = mce_severity(&m, NULL, NULL, false); m->severity = mce_severity(m, NULL, NULL, false);
/* /*
* Don't get the IP here because it's unlikely to * Don't get the IP here because it's unlikely to
* have anything to do with the actual error location. * have anything to do with the actual error location.
*/ */
if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) if (mca_cfg.dont_log_ce && !mce_usable_address(m))
goto clear_it; goto clear_it;
if (flags & MCP_QUEUE_LOG) if (flags & MCP_QUEUE_LOG)
mce_gen_pool_add(&m); mce_gen_pool_add(&err);
else else
mce_log(&m); mce_log(&err);
clear_it: clear_it:
/* /*
@ -905,9 +925,10 @@ static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_reg
* Do a quick check if any of the events requires a panic. * Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them. * This decides if we keep the events around or clear them.
*/ */
static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, unsigned long *validp,
struct pt_regs *regs) struct pt_regs *regs)
{ {
struct mce *m = &err->m;
char *tmp = *msg; char *tmp = *msg;
int i; int i;
@ -925,7 +946,7 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
m->bank = i; m->bank = i;
if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) { if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i); mce_read_aux(err, i);
*msg = tmp; *msg = tmp;
return 1; return 1;
} }
@ -1016,10 +1037,11 @@ out:
*/ */
static void mce_reign(void) static void mce_reign(void)
{ {
int cpu; struct mce_hw_err *err = NULL;
struct mce *m = NULL; struct mce *m = NULL;
int global_worst = 0; int global_worst = 0;
char *msg = NULL; char *msg = NULL;
int cpu;
/* /*
* This CPU is the Monarch and the other CPUs have run * This CPU is the Monarch and the other CPUs have run
@ -1027,11 +1049,13 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs. * Grade the severity of the errors of all the CPUs.
*/ */
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct mce *mtmp = &per_cpu(mces_seen, cpu); struct mce_hw_err *etmp = &per_cpu(hw_errs_seen, cpu);
struct mce *mtmp = &etmp->m;
if (mtmp->severity > global_worst) { if (mtmp->severity > global_worst) {
global_worst = mtmp->severity; global_worst = mtmp->severity;
m = &per_cpu(mces_seen, cpu); err = &per_cpu(hw_errs_seen, cpu);
m = &err->m;
} }
} }
@ -1043,7 +1067,7 @@ static void mce_reign(void)
if (m && global_worst >= MCE_PANIC_SEVERITY) { if (m && global_worst >= MCE_PANIC_SEVERITY) {
/* call mce_severity() to get "msg" for panic */ /* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, &msg, true); mce_severity(m, NULL, &msg, true);
mce_panic("Fatal machine check", m, msg); mce_panic("Fatal machine check", err, msg);
} }
/* /*
@ -1060,11 +1084,11 @@ static void mce_reign(void)
mce_panic("Fatal machine check from unknown source", NULL, NULL); mce_panic("Fatal machine check from unknown source", NULL, NULL);
/* /*
* Now clear all the mces_seen so that they don't reappear on * Now clear all the hw_errs_seen so that they don't reappear on
* the next mce. * the next mce.
*/ */
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); memset(&per_cpu(hw_errs_seen, cpu), 0, sizeof(struct mce_hw_err));
} }
static atomic_t global_nwo; static atomic_t global_nwo;
@ -1268,13 +1292,14 @@ static noinstr bool mce_check_crashing_cpu(void)
} }
static __always_inline int static __always_inline int
__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final, __mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs,
unsigned long *toclear, unsigned long *valid_banks, int no_way_out, struct mce_hw_err *final, unsigned long *toclear,
int *worst) unsigned long *valid_banks, int no_way_out, int *worst)
{ {
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg; struct mca_config *cfg = &mca_cfg;
int severity, i, taint = 0; int severity, i, taint = 0;
struct mce *m = &err->m;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) { for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
arch___clear_bit(i, toclear); arch___clear_bit(i, toclear);
@ -1319,7 +1344,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
if (severity == MCE_NO_SEVERITY) if (severity == MCE_NO_SEVERITY)
continue; continue;
mce_read_aux(m, i); mce_read_aux(err, i);
/* assuming valid severity level != 0 */ /* assuming valid severity level != 0 */
m->severity = severity; m->severity = severity;
@ -1329,17 +1354,17 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
* done in #MC context, where instrumentation is disabled. * done in #MC context, where instrumentation is disabled.
*/ */
instrumentation_begin(); instrumentation_begin();
mce_log(m); mce_log(err);
instrumentation_end(); instrumentation_end();
if (severity > *worst) { if (severity > *worst) {
*final = *m; *final = *err;
*worst = severity; *worst = severity;
} }
} }
/* mce_clear_state will clear *final, save locally for use later */ /* mce_clear_state will clear *final, save locally for use later */
*m = *final; *err = *final;
return taint; return taint;
} }
@ -1399,9 +1424,10 @@ static void kill_me_never(struct callback_head *cb)
set_mce_nospec(pfn); set_mce_nospec(pfn);
} }
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) static void queue_task_work(struct mce_hw_err *err, char *msg, void (*func)(struct callback_head *))
{ {
int count = ++current->mce_count; int count = ++current->mce_count;
struct mce *m = &err->m;
/* First call, save all the details */ /* First call, save all the details */
if (count == 1) { if (count == 1) {
@ -1414,11 +1440,12 @@ static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callba
/* Ten is likely overkill. Don't expect more than two faults before task_work() */ /* Ten is likely overkill. Don't expect more than two faults before task_work() */
if (count > 10) if (count > 10)
mce_panic("Too many consecutive machine checks while accessing user data", m, msg); mce_panic("Too many consecutive machine checks while accessing user data",
err, msg);
/* Second or later call, make sure page address matches the one from first call */ /* Second or later call, make sure page address matches the one from first call */
if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT)) if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
mce_panic("Consecutive machine checks to different user pages", m, msg); mce_panic("Consecutive machine checks to different user pages", err, msg);
/* Do not call task_work_add() more than once */ /* Do not call task_work_add() more than once */
if (count > 1) if (count > 1)
@ -1467,8 +1494,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0; int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 }; DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 }; DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
struct mce m, *final; struct mce_hw_err *final;
struct mce_hw_err err;
char *msg = NULL; char *msg = NULL;
struct mce *m;
if (unlikely(mce_flags.p5)) if (unlikely(mce_flags.p5))
return pentium_machine_check(regs); return pentium_machine_check(regs);
@ -1506,13 +1535,14 @@ noinstr void do_machine_check(struct pt_regs *regs)
this_cpu_inc(mce_exception_count); this_cpu_inc(mce_exception_count);
mce_gather_info(&m, regs); mce_gather_info(&err, regs);
m.tsc = rdtsc(); m = &err.m;
m->tsc = rdtsc();
final = this_cpu_ptr(&mces_seen); final = this_cpu_ptr(&hw_errs_seen);
*final = m; *final = err;
no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs); no_way_out = mce_no_way_out(&err, &msg, valid_banks, regs);
barrier(); barrier();
@ -1521,15 +1551,15 @@ noinstr void do_machine_check(struct pt_regs *regs)
* Assume the worst for now, but if we find the * Assume the worst for now, but if we find the
* severity is MCE_AR_SEVERITY we have other options. * severity is MCE_AR_SEVERITY we have other options.
*/ */
if (!(m.mcgstatus & MCG_STATUS_RIPV)) if (!(m->mcgstatus & MCG_STATUS_RIPV))
kill_current_task = 1; kill_current_task = 1;
/* /*
* Check if this MCE is signaled to only this logical processor, * Check if this MCE is signaled to only this logical processor,
* on Intel, Zhaoxin only. * on Intel, Zhaoxin only.
*/ */
if (m.cpuvendor == X86_VENDOR_INTEL || if (m->cpuvendor == X86_VENDOR_INTEL ||
m.cpuvendor == X86_VENDOR_ZHAOXIN) m->cpuvendor == X86_VENDOR_ZHAOXIN)
lmce = m.mcgstatus & MCG_STATUS_LMCES; lmce = m->mcgstatus & MCG_STATUS_LMCES;
/* /*
* Local machine check may already know that we have to panic. * Local machine check may already know that we have to panic.
@ -1540,12 +1570,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
*/ */
if (lmce) { if (lmce) {
if (no_way_out) if (no_way_out)
mce_panic("Fatal local machine check", &m, msg); mce_panic("Fatal local machine check", &err, msg);
} else { } else {
order = mce_start(&no_way_out); order = mce_start(&no_way_out);
} }
taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst); taint = __mc_scan_banks(&err, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out) if (!no_way_out)
mce_clear_state(toclear); mce_clear_state(toclear);
@ -1560,7 +1590,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
no_way_out = worst >= MCE_PANIC_SEVERITY; no_way_out = worst >= MCE_PANIC_SEVERITY;
if (no_way_out) if (no_way_out)
mce_panic("Fatal machine check on current CPU", &m, msg); mce_panic("Fatal machine check on current CPU", &err, msg);
} }
} else { } else {
/* /*
@ -1572,8 +1602,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg". * make sure we have the right "msg".
*/ */
if (worst >= MCE_PANIC_SEVERITY) { if (worst >= MCE_PANIC_SEVERITY) {
mce_severity(&m, regs, &msg, true); mce_severity(m, regs, &msg, true);
mce_panic("Local fatal machine check!", &m, msg); mce_panic("Local fatal machine check!", &err, msg);
} }
} }
@ -1591,16 +1621,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
goto out; goto out;
/* Fault was in user mode and we need to take some action */ /* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) { if ((m->cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */ /* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs)); BUG_ON(!on_thread_stack() || !user_mode(regs));
if (!mce_usable_address(&m)) if (!mce_usable_address(m))
queue_task_work(&m, msg, kill_me_now); queue_task_work(&err, msg, kill_me_now);
else else
queue_task_work(&m, msg, kill_me_maybe); queue_task_work(&err, msg, kill_me_maybe);
} else if (m.mcgstatus & MCG_STATUS_SEAM_NR) { } else if (m->mcgstatus & MCG_STATUS_SEAM_NR) {
/* /*
* Saved RIP on stack makes it look like the machine check * Saved RIP on stack makes it look like the machine check
* was taken in the kernel on the instruction following * was taken in the kernel on the instruction following
@ -1612,8 +1642,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* not occur there. Mark the page as poisoned so it won't * not occur there. Mark the page as poisoned so it won't
* be added to free list when the guest is terminated. * be added to free list when the guest is terminated.
*/ */
if (mce_usable_address(&m)) { if (mce_usable_address(m)) {
struct page *p = pfn_to_online_page(m.addr >> PAGE_SHIFT); struct page *p = pfn_to_online_page(m->addr >> PAGE_SHIFT);
if (p) if (p)
SetPageHWPoison(p); SetPageHWPoison(p);
@ -1628,13 +1658,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
* corresponding exception handler which would do that is the * corresponding exception handler which would do that is the
* proper one. * proper one.
*/ */
if (m.kflags & MCE_IN_KERNEL_RECOV) { if (m->kflags & MCE_IN_KERNEL_RECOV) {
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0)) if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg); mce_panic("Failed kernel mode recovery", &err, msg);
} }
if (m.kflags & MCE_IN_KERNEL_COPYIN) if (m->kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&m, msg, kill_me_never); queue_task_work(&err, msg, kill_me_never);
} }
out: out:

View File

@ -264,15 +264,8 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
return put_user(sizeof(struct mce), p); return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN: case MCE_GET_LOG_LEN:
return put_user(mcelog->len, p); return put_user(mcelog->len, p);
case MCE_GETCLEAR_FLAGS: { case MCE_GETCLEAR_FLAGS:
unsigned flags; return put_user(xchg(&mcelog->flags, 0), p);
do {
flags = mcelog->flags;
} while (cmpxchg(&mcelog->flags, flags, 0) != flags);
return put_user(flags, p);
}
default: default:
return -ENOTTY; return -ENOTTY;
} }

View File

@ -31,15 +31,15 @@ static LLIST_HEAD(mce_event_llist);
*/ */
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l) static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{ {
struct mce_hw_err *err1, *err2;
struct mce_evt_llist *node; struct mce_evt_llist *node;
struct mce *m1, *m2;
m1 = &t->mce; err1 = &t->err;
llist_for_each_entry(node, &l->llnode, llnode) { llist_for_each_entry(node, &l->llnode, llnode) {
m2 = &node->mce; err2 = &node->err;
if (!mce_cmp(m1, m2)) if (!mce_cmp(&err1->m, &err2->m))
return true; return true;
} }
return false; return false;
@ -73,8 +73,8 @@ struct llist_node *mce_gen_pool_prepare_records(void)
void mce_gen_pool_process(struct work_struct *__unused) void mce_gen_pool_process(struct work_struct *__unused)
{ {
struct llist_node *head;
struct mce_evt_llist *node, *tmp; struct mce_evt_llist *node, *tmp;
struct llist_node *head;
struct mce *mce; struct mce *mce;
head = llist_del_all(&mce_event_llist); head = llist_del_all(&mce_event_llist);
@ -83,7 +83,7 @@ void mce_gen_pool_process(struct work_struct *__unused)
head = llist_reverse_order(head); head = llist_reverse_order(head);
llist_for_each_entry_safe(node, tmp, head, llnode) { llist_for_each_entry_safe(node, tmp, head, llnode) {
mce = &node->mce; mce = &node->err.m;
blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
} }
@ -94,11 +94,11 @@ bool mce_gen_pool_empty(void)
return llist_empty(&mce_event_llist); return llist_empty(&mce_event_llist);
} }
int mce_gen_pool_add(struct mce *mce) int mce_gen_pool_add(struct mce_hw_err *err)
{ {
struct mce_evt_llist *node; struct mce_evt_llist *node;
if (filter_mce(mce)) if (filter_mce(&err->m))
return -EINVAL; return -EINVAL;
if (!mce_evt_pool) if (!mce_evt_pool)
@ -110,7 +110,7 @@ int mce_gen_pool_add(struct mce *mce)
return -ENOMEM; return -ENOMEM;
} }
memcpy(&node->mce, mce, sizeof(*mce)); memcpy(&node->err, err, sizeof(*err));
llist_add(&node->llnode, &mce_event_llist); llist_add(&node->llnode, &mce_event_llist);
return 0; return 0;

View File

@ -502,8 +502,9 @@ static void prepare_msrs(void *info)
static void do_inject(void) static void do_inject(void)
{ {
u64 mcg_status = 0;
unsigned int cpu = i_mce.extcpu; unsigned int cpu = i_mce.extcpu;
struct mce_hw_err err;
u64 mcg_status = 0;
u8 b = i_mce.bank; u8 b = i_mce.bank;
i_mce.tsc = rdtsc_ordered(); i_mce.tsc = rdtsc_ordered();
@ -517,7 +518,8 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_SYNDV; i_mce.status |= MCI_STATUS_SYNDV;
if (inj_type == SW_INJ) { if (inj_type == SW_INJ) {
mce_log(&i_mce); err.m = i_mce;
mce_log(&err);
return; return;
} }

View File

@ -94,7 +94,7 @@ static int cmci_supported(int *banks)
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0; return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap); rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
return !!(cap & MCG_CMCI_P); return !!(cap & MCG_CMCI_P);
} }

View File

@ -26,12 +26,12 @@ extern struct blocking_notifier_head x86_mce_decoder_chain;
struct mce_evt_llist { struct mce_evt_llist {
struct llist_node llnode; struct llist_node llnode;
struct mce mce; struct mce_hw_err err;
}; };
void mce_gen_pool_process(struct work_struct *__unused); void mce_gen_pool_process(struct work_struct *__unused);
bool mce_gen_pool_empty(void); bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce); int mce_gen_pool_add(struct mce_hw_err *err);
int mce_gen_pool_init(void); int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void); struct llist_node *mce_gen_pool_prepare_records(void);

View File

@ -793,7 +793,9 @@ static int
amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{ {
struct mce *m = (struct mce *)data; struct mce *m = (struct mce *)data;
struct mce_hw_err *err = to_mce_hw_err(m);
unsigned int fam = x86_family(m->cpuid); unsigned int fam = x86_family(m->cpuid);
u32 mca_config_lo = 0, dummy;
int ecc; int ecc;
if (m->kflags & MCE_HANDLED_CEC) if (m->kflags & MCE_HANDLED_CEC)
@ -813,11 +815,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_PCC) ? "PCC" : "-")); ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
if (boot_cpu_has(X86_FEATURE_SMCA)) { if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high; rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(m->bank), &mca_config_lo, &dummy);
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
if (!rdmsr_safe(addr, &low, &high) && if (mca_config_lo & MCI_CONFIG_MCAX)
(low & MCI_CONFIG_MCAX))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
@ -850,8 +850,18 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (boot_cpu_has(X86_FEATURE_SMCA)) { if (boot_cpu_has(X86_FEATURE_SMCA)) {
pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
if (m->status & MCI_STATUS_SYNDV) if (m->status & MCI_STATUS_SYNDV) {
pr_cont(", Syndrome: 0x%016llx", m->synd); pr_cont(", Syndrome: 0x%016llx\n", m->synd);
if (mca_config_lo & MCI_CONFIG_FRUTEXT) {
char frutext[17];
frutext[16] = '\0';
memcpy(&frutext[0], &err->vendor.amd.synd1, 8);
memcpy(&frutext[8], &err->vendor.amd.synd2, 8);
pr_emerg(HW_ERR "FRU Text: %s", frutext);
}
}
pr_cont("\n"); pr_cont("\n");

View File

@ -19,9 +19,9 @@
TRACE_EVENT(mce_record, TRACE_EVENT(mce_record,
TP_PROTO(struct mce *m), TP_PROTO(struct mce_hw_err *err),
TP_ARGS(m), TP_ARGS(err),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( u64, mcgcap ) __field( u64, mcgcap )
@ -43,31 +43,33 @@ TRACE_EVENT(mce_record,
__field( u8, bank ) __field( u8, bank )
__field( u8, cpuvendor ) __field( u8, cpuvendor )
__field( u32, microcode ) __field( u32, microcode )
__dynamic_array(u8, v_data, sizeof(err->vendor))
), ),
TP_fast_assign( TP_fast_assign(
__entry->mcgcap = m->mcgcap; __entry->mcgcap = err->m.mcgcap;
__entry->mcgstatus = m->mcgstatus; __entry->mcgstatus = err->m.mcgstatus;
__entry->status = m->status; __entry->status = err->m.status;
__entry->addr = m->addr; __entry->addr = err->m.addr;
__entry->misc = m->misc; __entry->misc = err->m.misc;
__entry->synd = m->synd; __entry->synd = err->m.synd;
__entry->ipid = m->ipid; __entry->ipid = err->m.ipid;
__entry->ip = m->ip; __entry->ip = err->m.ip;
__entry->tsc = m->tsc; __entry->tsc = err->m.tsc;
__entry->ppin = m->ppin; __entry->ppin = err->m.ppin;
__entry->walltime = m->time; __entry->walltime = err->m.time;
__entry->cpu = m->extcpu; __entry->cpu = err->m.extcpu;
__entry->cpuid = m->cpuid; __entry->cpuid = err->m.cpuid;
__entry->apicid = m->apicid; __entry->apicid = err->m.apicid;
__entry->socketid = m->socketid; __entry->socketid = err->m.socketid;
__entry->cs = m->cs; __entry->cs = err->m.cs;
__entry->bank = m->bank; __entry->bank = err->m.bank;
__entry->cpuvendor = m->cpuvendor; __entry->cpuvendor = err->m.cpuvendor;
__entry->microcode = m->microcode; __entry->microcode = err->m.microcode;
memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor));
), ),
TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s",
__entry->cpu, __entry->cpu,
__entry->mcgcap, __entry->mcgstatus, __entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status, __entry->bank, __entry->status,
@ -83,7 +85,8 @@ TRACE_EVENT(mce_record,
__entry->walltime, __entry->walltime,
__entry->socketid, __entry->socketid,
__entry->apicid, __entry->apicid,
__entry->microcode) __entry->microcode,
__print_dynamic_array(v_data, sizeof(u8)))
); );
#endif /* _TRACE_MCE_H */ #endif /* _TRACE_MCE_H */

View File

@ -119,6 +119,14 @@
trace_print_array_seq(p, array, count, el_size); \ trace_print_array_seq(p, array, count, el_size); \
}) })
#undef __print_dynamic_array
#define __print_dynamic_array(array, el_size) \
({ \
__print_array(__get_dynamic_array(array), \
__get_dynamic_array_len(array) / (el_size), \
(el_size)); \
})
#undef __print_hex_dump #undef __print_hex_dump
#define __print_hex_dump(prefix_str, prefix_type, \ #define __print_hex_dump(prefix_str, prefix_type, \
rowsize, groupsize, buf, len, ascii) \ rowsize, groupsize, buf, len, ascii) \

View File

@ -22,6 +22,7 @@
#undef __get_rel_cpumask #undef __get_rel_cpumask
#undef __get_rel_sockaddr #undef __get_rel_sockaddr
#undef __print_array #undef __print_array
#undef __print_dynamic_array
#undef __print_hex_dump #undef __print_hex_dump
#undef __get_buf #undef __get_buf

View File

@ -319,7 +319,7 @@ TRACE_EVENT(foo_bar,
__assign_cpumask(cpum, cpumask_bits(mask)); __assign_cpumask(cpum, cpumask_bits(mask));
), ),
TP_printk("foo %s %d %s %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar, TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar,
/* /*
* Notice here the use of some helper functions. This includes: * Notice here the use of some helper functions. This includes:
@ -363,6 +363,11 @@ TRACE_EVENT(foo_bar,
__print_array(__get_dynamic_array(list), __print_array(__get_dynamic_array(list),
__get_dynamic_array_len(list) / sizeof(int), __get_dynamic_array_len(list) / sizeof(int),
sizeof(int)), sizeof(int)),
/* A shortcut is to use __print_dynamic_array for dynamic arrays */
__print_dynamic_array(list, sizeof(int)),
__get_str(str), __get_str(lstr), __get_str(str), __get_str(lstr),
__get_bitmask(cpus), __get_cpumask(cpum), __get_bitmask(cpus), __get_cpumask(cpum),
__get_str(vstr)) __get_str(vstr))