mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: "This time around we have in store: - Disable MC4_MISC thresholding banks on all AMD family 0x15 models (Shirish S) - AMD MCE error descriptions update and error decode improvements (Yazen Ghannam) - The usual smaller conversions and fixes" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Improve error message when kernel cannot recover, p2 EDAC/mce_amd: Decode MCA_STATUS in bit definition order EDAC/mce_amd: Decode MCA_STATUS[Scrub] bit EDAC, mce_amd: Print ExtErrorCode and description on a single line EDAC, mce_amd: Match error descriptions to latest documentation x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types RAS: Add a MAINTAINERS entry RAS: Use consistent types for UUIDs x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models x86/MCE: Switch to use the new generic UUID API
This commit is contained in:
commit
e13284da94
10
MAINTAINERS
10
MAINTAINERS
@ -12960,6 +12960,16 @@ M: Alexandre Bounine <alex.bou9@gmail.com>
|
||||
S: Maintained
|
||||
F: drivers/rapidio/
|
||||
|
||||
RAS INFRASTRUCTURE
|
||||
M: Tony Luck <tony.luck@intel.com>
|
||||
M: Borislav Petkov <bp@alien8.de>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/ras/
|
||||
F: include/linux/ras.h
|
||||
F: include/ras/ras_event.h
|
||||
F: Documentation/admin-guide/ras.rst
|
||||
|
||||
RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
|
||||
L: linux-wireless@vger.kernel.org
|
||||
S: Orphan
|
||||
|
@ -48,6 +48,7 @@
|
||||
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
|
||||
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
|
||||
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
|
||||
#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
|
||||
|
||||
/*
|
||||
* McaX field if set indicates a given bank supports MCA extensions:
|
||||
@ -307,11 +308,17 @@ enum smca_bank_types {
|
||||
SMCA_FP, /* Floating Point */
|
||||
SMCA_L3_CACHE, /* L3 Cache */
|
||||
SMCA_CS, /* Coherent Slave */
|
||||
SMCA_CS_V2, /* Coherent Slave */
|
||||
SMCA_PIE, /* Power, Interrupts, etc. */
|
||||
SMCA_UMC, /* Unified Memory Controller */
|
||||
SMCA_PB, /* Parameter Block */
|
||||
SMCA_PSP, /* Platform Security Processor */
|
||||
SMCA_PSP_V2, /* Platform Security Processor */
|
||||
SMCA_SMU, /* System Management Unit */
|
||||
SMCA_SMU_V2, /* System Management Unit */
|
||||
SMCA_MP5, /* Microprocessor 5 Unit */
|
||||
SMCA_NBIO, /* Northbridge IO Unit */
|
||||
SMCA_PCIE, /* PCI Express Unit */
|
||||
N_SMCA_BANK_TYPES
|
||||
};
|
||||
|
||||
|
@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
|
||||
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
|
||||
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
|
||||
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
|
||||
[SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
|
||||
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
|
||||
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
|
||||
[SMCA_PB] = { "param_block", "Parameter Block" },
|
||||
[SMCA_PSP] = { "psp", "Platform Security Processor" },
|
||||
[SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
|
||||
[SMCA_SMU] = { "smu", "System Management Unit" },
|
||||
[SMCA_SMU_V2] = { "smu", "System Management Unit" },
|
||||
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
|
||||
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
|
||||
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
|
||||
};
|
||||
|
||||
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
|
||||
@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
|
||||
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
|
||||
|
||||
/* ZN Core (HWID=0xB0) MCA types */
|
||||
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
|
||||
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
|
||||
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
|
||||
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
|
||||
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
|
||||
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
|
||||
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
|
||||
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
|
||||
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
|
||||
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
|
||||
|
||||
/* Data Fabric MCA types */
|
||||
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
|
||||
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
|
||||
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
|
||||
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
|
||||
|
||||
/* Unified Memory Controller MCA type */
|
||||
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
|
||||
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
|
||||
|
||||
/* Parameter Block MCA type */
|
||||
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
|
||||
|
||||
/* Platform Security Processor MCA type */
|
||||
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
|
||||
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
|
||||
|
||||
/* System Management Unit MCA type */
|
||||
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
|
||||
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
|
||||
|
||||
/* Microprocessor 5 Unit MCA type */
|
||||
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
|
||||
|
||||
/* Northbridge IO Unit MCA type */
|
||||
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
|
||||
|
||||
/* PCI Express Unit MCA type */
|
||||
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
|
||||
};
|
||||
|
||||
struct smca_bank smca_banks[MAX_NR_BANKS];
|
||||
@ -545,6 +563,40 @@ out:
|
||||
return offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn off MC4_MISC thresholding banks on all family 0x15 models since
|
||||
* they're not supported there.
|
||||
*/
|
||||
void disable_err_thresholding(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i;
|
||||
u64 hwcr;
|
||||
bool need_toggle;
|
||||
u32 msrs[] = {
|
||||
0x00000413, /* MC4_MISC0 */
|
||||
0xc0000408, /* MC4_MISC1 */
|
||||
};
|
||||
|
||||
if (c->x86 != 0x15)
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_K7_HWCR, hwcr);
|
||||
|
||||
/* McStatusWrEn has to be set */
|
||||
need_toggle = !(hwcr & BIT(18));
|
||||
|
||||
if (need_toggle)
|
||||
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
|
||||
|
||||
/* Clear CntP bit safely */
|
||||
for (i = 0; i < ARRAY_SIZE(msrs); i++)
|
||||
msr_clear_bit(msrs[i], 62);
|
||||
|
||||
/* restore old settings */
|
||||
if (need_toggle)
|
||||
wrmsrl(MSR_K7_HWCR, hwcr);
|
||||
}
|
||||
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
unsigned int bank, block, cpu = smp_processor_id();
|
||||
int offset = -1;
|
||||
|
||||
disable_err_thresholding(c);
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (mce_flags.smca)
|
||||
smca_configure(bank, cpu);
|
||||
|
@ -64,10 +64,10 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
|
||||
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
|
||||
|
||||
#define CPER_CREATOR_MCE \
|
||||
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
|
||||
GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
|
||||
0x64, 0x90, 0xb8, 0x9d)
|
||||
#define CPER_SECTION_TYPE_MCE \
|
||||
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
|
||||
GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
|
||||
0x04, 0x4a, 0x38, 0xfc)
|
||||
|
||||
/*
|
||||
@ -135,7 +135,7 @@ retry:
|
||||
goto out;
|
||||
/* try to skip other type records in storage */
|
||||
else if (rc != sizeof(rcd) ||
|
||||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
|
||||
!guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
|
||||
goto retry;
|
||||
memcpy(m, &rcd.mce, sizeof(*m));
|
||||
rc = sizeof(*m);
|
||||
|
@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
if (c->x86 == 0x15 && c->x86_model <= 0xf)
|
||||
mce_flags.overflow_recov = 1;
|
||||
|
||||
/*
|
||||
* Turn off MC4_MISC thresholding banks on those models since
|
||||
* they're not supported there.
|
||||
*/
|
||||
if (c->x86 == 0x15 &&
|
||||
(c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
|
||||
int i;
|
||||
u64 hwcr;
|
||||
bool need_toggle;
|
||||
u32 msrs[] = {
|
||||
0x00000413, /* MC4_MISC0 */
|
||||
0xc0000408, /* MC4_MISC1 */
|
||||
};
|
||||
|
||||
rdmsrl(MSR_K7_HWCR, hwcr);
|
||||
|
||||
/* McStatusWrEn has to be set */
|
||||
need_toggle = !(hwcr & BIT(18));
|
||||
|
||||
if (need_toggle)
|
||||
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
|
||||
|
||||
/* Clear CntP bit safely */
|
||||
for (i = 0; i < ARRAY_SIZE(msrs); i++)
|
||||
msr_clear_bit(msrs[i], 62);
|
||||
|
||||
/* restore old settings */
|
||||
if (need_toggle)
|
||||
wrmsrl(MSR_K7_HWCR, hwcr);
|
||||
}
|
||||
}
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
|
@ -165,6 +165,11 @@ static struct severity {
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||
KERNEL
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "Instruction fetch error in kernel",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
|
||||
KERNEL
|
||||
),
|
||||
#endif
|
||||
MCESEV(
|
||||
PANIC, "Action required: unknown MCACOD",
|
||||
|
@ -151,138 +151,223 @@ static const char * const mc6_mce_desc[] = {
|
||||
|
||||
/* Scalable MCA error strings */
|
||||
static const char * const smca_ls_mce_desc[] = {
|
||||
"Load queue parity",
|
||||
"Store queue parity",
|
||||
"Miss address buffer payload parity",
|
||||
"L1 TLB parity",
|
||||
"Reserved",
|
||||
"DC tag error type 6",
|
||||
"DC tag error type 1",
|
||||
"Load queue parity error",
|
||||
"Store queue parity error",
|
||||
"Miss address buffer payload parity error",
|
||||
"Level 1 TLB parity error",
|
||||
"DC Tag error type 5",
|
||||
"DC Tag error type 6",
|
||||
"DC Tag error type 1",
|
||||
"Internal error type 1",
|
||||
"Internal error type 2",
|
||||
"Sys Read data error thread 0",
|
||||
"Sys read data error thread 1",
|
||||
"DC tag error type 2",
|
||||
"DC data error type 1 (poison consumption)",
|
||||
"DC data error type 2",
|
||||
"DC data error type 3",
|
||||
"DC tag error type 4",
|
||||
"L2 TLB parity",
|
||||
"System Read Data Error Thread 0",
|
||||
"System Read Data Error Thread 1",
|
||||
"DC Tag error type 2",
|
||||
"DC Data error type 1 and poison consumption",
|
||||
"DC Data error type 2",
|
||||
"DC Data error type 3",
|
||||
"DC Tag error type 4",
|
||||
"Level 2 TLB parity error",
|
||||
"PDC parity error",
|
||||
"DC tag error type 3",
|
||||
"DC tag error type 5",
|
||||
"L2 fill data error",
|
||||
"DC Tag error type 3",
|
||||
"DC Tag error type 5",
|
||||
"L2 Fill Data error",
|
||||
};
|
||||
|
||||
static const char * const smca_if_mce_desc[] = {
|
||||
"microtag probe port parity error",
|
||||
"IC microtag or full tag multi-hit error",
|
||||
"IC full tag parity",
|
||||
"IC data array parity",
|
||||
"Decoupling queue phys addr parity error",
|
||||
"L0 ITLB parity error",
|
||||
"L1 ITLB parity error",
|
||||
"L2 ITLB parity error",
|
||||
"BPQ snoop parity on Thread 0",
|
||||
"BPQ snoop parity on Thread 1",
|
||||
"L1 BTB multi-match error",
|
||||
"L2 BTB multi-match error",
|
||||
"L2 Cache Response Poison error",
|
||||
"System Read Data error",
|
||||
"Op Cache Microtag Probe Port Parity Error",
|
||||
"IC Microtag or Full Tag Multi-hit Error",
|
||||
"IC Full Tag Parity Error",
|
||||
"IC Data Array Parity Error",
|
||||
"Decoupling Queue PhysAddr Parity Error",
|
||||
"L0 ITLB Parity Error",
|
||||
"L1 ITLB Parity Error",
|
||||
"L2 ITLB Parity Error",
|
||||
"BPQ Thread 0 Snoop Parity Error",
|
||||
"BPQ Thread 1 Snoop Parity Error",
|
||||
"L1 BTB Multi-Match Error",
|
||||
"L2 BTB Multi-Match Error",
|
||||
"L2 Cache Response Poison Error",
|
||||
"System Read Data Error",
|
||||
};
|
||||
|
||||
static const char * const smca_l2_mce_desc[] = {
|
||||
"L2M tag multi-way-hit error",
|
||||
"L2M tag ECC error",
|
||||
"L2M data ECC error",
|
||||
"HW assert",
|
||||
"L2M Tag Multiple-Way-Hit error",
|
||||
"L2M Tag or State Array ECC Error",
|
||||
"L2M Data Array ECC Error",
|
||||
"Hardware Assert Error",
|
||||
};
|
||||
|
||||
static const char * const smca_de_mce_desc[] = {
|
||||
"uop cache tag parity error",
|
||||
"uop cache data parity error",
|
||||
"Insn buffer parity error",
|
||||
"uop queue parity error",
|
||||
"Insn dispatch queue parity error",
|
||||
"Fetch address FIFO parity",
|
||||
"Patch RAM data parity",
|
||||
"Patch RAM sequencer parity",
|
||||
"uop buffer parity"
|
||||
"Micro-op cache tag parity error",
|
||||
"Micro-op cache data parity error",
|
||||
"Instruction buffer parity error",
|
||||
"Micro-op queue parity error",
|
||||
"Instruction dispatch queue parity error",
|
||||
"Fetch address FIFO parity error",
|
||||
"Patch RAM data parity error",
|
||||
"Patch RAM sequencer parity error",
|
||||
"Micro-op buffer parity error"
|
||||
};
|
||||
|
||||
static const char * const smca_ex_mce_desc[] = {
|
||||
"Watchdog timeout error",
|
||||
"Phy register file parity",
|
||||
"Flag register file parity",
|
||||
"Immediate displacement register file parity",
|
||||
"Address generator payload parity",
|
||||
"EX payload parity",
|
||||
"Checkpoint queue parity",
|
||||
"Retire dispatch queue parity",
|
||||
"Watchdog Timeout error",
|
||||
"Physical register file parity error",
|
||||
"Flag register file parity error",
|
||||
"Immediate displacement register file parity error",
|
||||
"Address generator payload parity error",
|
||||
"EX payload parity error",
|
||||
"Checkpoint queue parity error",
|
||||
"Retire dispatch queue parity error",
|
||||
"Retire status queue parity error",
|
||||
"Scheduling queue parity error",
|
||||
"Branch buffer queue parity error",
|
||||
"Hardware Assertion error",
|
||||
};
|
||||
|
||||
static const char * const smca_fp_mce_desc[] = {
|
||||
"Physical register file parity",
|
||||
"Freelist parity error",
|
||||
"Schedule queue parity",
|
||||
"Physical register file (PRF) parity error",
|
||||
"Freelist (FL) parity error",
|
||||
"Schedule queue parity error",
|
||||
"NSQ parity error",
|
||||
"Retire queue parity",
|
||||
"Status register file parity",
|
||||
"Retire queue (RQ) parity error",
|
||||
"Status register file (SRF) parity error",
|
||||
"Hardware assertion",
|
||||
};
|
||||
|
||||
static const char * const smca_l3_mce_desc[] = {
|
||||
"Shadow tag macro ECC error",
|
||||
"Shadow tag macro multi-way-hit error",
|
||||
"L3M tag ECC error",
|
||||
"L3M tag multi-way-hit error",
|
||||
"L3M data ECC error",
|
||||
"XI parity, L3 fill done channel error",
|
||||
"L3 victim queue parity",
|
||||
"L3 HW assert",
|
||||
"Shadow Tag Macro ECC Error",
|
||||
"Shadow Tag Macro Multi-way-hit Error",
|
||||
"L3M Tag ECC Error",
|
||||
"L3M Tag Multi-way-hit Error",
|
||||
"L3M Data ECC Error",
|
||||
"SDP Parity Error or SystemReadDataError from XI",
|
||||
"L3 Victim Queue Parity Error",
|
||||
"L3 Hardware Assertion",
|
||||
};
|
||||
|
||||
static const char * const smca_cs_mce_desc[] = {
|
||||
"Illegal request from transport layer",
|
||||
"Address violation",
|
||||
"Security violation",
|
||||
"Illegal response from transport layer",
|
||||
"Unexpected response",
|
||||
"Parity error on incoming request or probe response data",
|
||||
"Parity error on incoming read response data",
|
||||
"Atomic request parity",
|
||||
"ECC error on probe filter access",
|
||||
"Illegal Request",
|
||||
"Address Violation",
|
||||
"Security Violation",
|
||||
"Illegal Response",
|
||||
"Unexpected Response",
|
||||
"Request or Probe Parity Error",
|
||||
"Read Response Parity Error",
|
||||
"Atomic Request Parity Error",
|
||||
"Probe Filter ECC Error",
|
||||
};
|
||||
|
||||
static const char * const smca_cs2_mce_desc[] = {
|
||||
"Illegal Request",
|
||||
"Address Violation",
|
||||
"Security Violation",
|
||||
"Illegal Response",
|
||||
"Unexpected Response",
|
||||
"Request or Probe Parity Error",
|
||||
"Read Response Parity Error",
|
||||
"Atomic Request Parity Error",
|
||||
"SDP read response had no match in the CS queue",
|
||||
"Probe Filter Protocol Error",
|
||||
"Probe Filter ECC Error",
|
||||
"SDP read response had an unexpected RETRY error",
|
||||
"Counter overflow error",
|
||||
"Counter underflow error",
|
||||
};
|
||||
|
||||
static const char * const smca_pie_mce_desc[] = {
|
||||
"HW assert",
|
||||
"Internal PIE register security violation",
|
||||
"Error on GMI link",
|
||||
"Poison data written to internal PIE register",
|
||||
"Hardware Assert",
|
||||
"Register security violation",
|
||||
"Link Error",
|
||||
"Poison data consumption",
|
||||
"A deferred error was detected in the DF"
|
||||
};
|
||||
|
||||
static const char * const smca_umc_mce_desc[] = {
|
||||
"DRAM ECC error",
|
||||
"Data poison error on DRAM",
|
||||
"Data poison error",
|
||||
"SDP parity error",
|
||||
"Advanced peripheral bus error",
|
||||
"Command/address parity error",
|
||||
"Address/Command parity error",
|
||||
"Write data CRC error",
|
||||
"DCQ SRAM ECC error",
|
||||
"AES SRAM ECC error",
|
||||
};
|
||||
|
||||
static const char * const smca_pb_mce_desc[] = {
|
||||
"Parameter Block RAM ECC error",
|
||||
"An ECC error in the Parameter Block RAM array",
|
||||
};
|
||||
|
||||
static const char * const smca_psp_mce_desc[] = {
|
||||
"PSP RAM ECC or parity error",
|
||||
"An ECC or parity error in a PSP RAM instance",
|
||||
};
|
||||
|
||||
static const char * const smca_psp2_mce_desc[] = {
|
||||
"High SRAM ECC or parity error",
|
||||
"Low SRAM ECC or parity error",
|
||||
"Instruction Cache Bank 0 ECC or parity error",
|
||||
"Instruction Cache Bank 1 ECC or parity error",
|
||||
"Instruction Tag Ram 0 parity error",
|
||||
"Instruction Tag Ram 1 parity error",
|
||||
"Data Cache Bank 0 ECC or parity error",
|
||||
"Data Cache Bank 1 ECC or parity error",
|
||||
"Data Cache Bank 2 ECC or parity error",
|
||||
"Data Cache Bank 3 ECC or parity error",
|
||||
"Data Tag Bank 0 parity error",
|
||||
"Data Tag Bank 1 parity error",
|
||||
"Data Tag Bank 2 parity error",
|
||||
"Data Tag Bank 3 parity error",
|
||||
"Dirty Data Ram parity error",
|
||||
"TLB Bank 0 parity error",
|
||||
"TLB Bank 1 parity error",
|
||||
"System Hub Read Buffer ECC or parity error",
|
||||
};
|
||||
|
||||
static const char * const smca_smu_mce_desc[] = {
|
||||
"SMU RAM ECC or parity error",
|
||||
"An ECC or parity error in an SMU RAM instance",
|
||||
};
|
||||
|
||||
static const char * const smca_smu2_mce_desc[] = {
|
||||
"High SRAM ECC or parity error",
|
||||
"Low SRAM ECC or parity error",
|
||||
"Data Cache Bank A ECC or parity error",
|
||||
"Data Cache Bank B ECC or parity error",
|
||||
"Data Tag Cache Bank A ECC or parity error",
|
||||
"Data Tag Cache Bank B ECC or parity error",
|
||||
"Instruction Cache Bank A ECC or parity error",
|
||||
"Instruction Cache Bank B ECC or parity error",
|
||||
"Instruction Tag Cache Bank A ECC or parity error",
|
||||
"Instruction Tag Cache Bank B ECC or parity error",
|
||||
"System Hub Read Buffer ECC or parity error",
|
||||
};
|
||||
|
||||
static const char * const smca_mp5_mce_desc[] = {
|
||||
"High SRAM ECC or parity error",
|
||||
"Low SRAM ECC or parity error",
|
||||
"Data Cache Bank A ECC or parity error",
|
||||
"Data Cache Bank B ECC or parity error",
|
||||
"Data Tag Cache Bank A ECC or parity error",
|
||||
"Data Tag Cache Bank B ECC or parity error",
|
||||
"Instruction Cache Bank A ECC or parity error",
|
||||
"Instruction Cache Bank B ECC or parity error",
|
||||
"Instruction Tag Cache Bank A ECC or parity error",
|
||||
"Instruction Tag Cache Bank B ECC or parity error",
|
||||
};
|
||||
|
||||
static const char * const smca_nbio_mce_desc[] = {
|
||||
"ECC or Parity error",
|
||||
"PCIE error",
|
||||
"SDP ErrEvent error",
|
||||
"SDP Egress Poison Error",
|
||||
"IOHC Internal Poison Error",
|
||||
};
|
||||
|
||||
static const char * const smca_pcie_mce_desc[] = {
|
||||
"CCIX PER Message logging",
|
||||
"CCIX Read Response with Status: Non-Data Error",
|
||||
"CCIX Write Response with Status: Non-Data Error",
|
||||
"CCIX Read Response with Status: Data Error",
|
||||
"CCIX Non-okay write response with data error",
|
||||
};
|
||||
|
||||
struct smca_mce_desc {
|
||||
@ -299,11 +384,17 @@ static struct smca_mce_desc smca_mce_descs[] = {
|
||||
[SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
|
||||
[SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
|
||||
[SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
|
||||
[SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
|
||||
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
|
||||
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
|
||||
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
|
||||
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
|
||||
[SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
|
||||
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
|
||||
[SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
|
||||
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
|
||||
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
|
||||
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
|
||||
};
|
||||
|
||||
static bool f12h_mc0_mce(u16 ec, u8 xec)
|
||||
@ -874,13 +965,12 @@ static void decode_smca_error(struct mce *m)
|
||||
|
||||
ip_name = smca_get_long_name(bank_type);
|
||||
|
||||
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
|
||||
pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
|
||||
|
||||
/* Only print the decode of valid error codes */
|
||||
if (xec < smca_mce_descs[bank_type].num_descs &&
|
||||
(hwid->xec_bitmap & BIT_ULL(xec))) {
|
||||
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||
pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
|
||||
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
|
||||
}
|
||||
|
||||
if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
|
||||
@ -961,8 +1051,24 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||
((m->status & MCI_STATUS_UC) ? "UE" :
|
||||
(m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
|
||||
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
|
||||
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
|
||||
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
|
||||
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
|
||||
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_SMCA)) {
|
||||
u32 low, high;
|
||||
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
|
||||
|
||||
if (!rdmsr_safe(addr, &low, &high) &&
|
||||
(low & MCI_CONFIG_MCAX))
|
||||
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
|
||||
|
||||
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
|
||||
}
|
||||
|
||||
/* do the two bits[14:13] together */
|
||||
ecc = (m->status >> 45) & 0x3;
|
||||
if (ecc)
|
||||
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
|
||||
|
||||
if (fam >= 0x15) {
|
||||
pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
|
||||
@ -972,21 +1078,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||
pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_SMCA)) {
|
||||
u32 low, high;
|
||||
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
|
||||
|
||||
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
|
||||
|
||||
if (!rdmsr_safe(addr, &low, &high) &&
|
||||
(low & MCI_CONFIG_MCAX))
|
||||
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
|
||||
}
|
||||
|
||||
/* do the two bits[14:13] together */
|
||||
ecc = (m->status >> 45) & 0x3;
|
||||
if (ecc)
|
||||
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
|
||||
if (fam >= 0x17)
|
||||
pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
|
||||
|
||||
pr_cont("]: 0x%016llx\n", m->status);
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
#define TRACE_INCLUDE_PATH ../../include/ras
|
||||
#include <ras/ras_event.h>
|
||||
|
||||
void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
|
||||
void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
|
||||
const char *fru_text, const u8 sev, const u8 *err,
|
||||
const u32 len)
|
||||
{
|
||||
|
@ -27,7 +27,7 @@
|
||||
TRACE_EVENT(extlog_mem_event,
|
||||
TP_PROTO(struct cper_sec_mem_err *mem,
|
||||
u32 err_seq,
|
||||
const uuid_le *fru_id,
|
||||
const guid_t *fru_id,
|
||||
const char *fru_text,
|
||||
u8 sev),
|
||||
|
||||
@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event,
|
||||
__field(u8, sev)
|
||||
__field(u64, pa)
|
||||
__field(u8, pa_mask_lsb)
|
||||
__field_struct(uuid_le, fru_id)
|
||||
__field_struct(guid_t, fru_id)
|
||||
__string(fru_text, fru_text)
|
||||
__field_struct(struct cper_mem_err_compact, data)
|
||||
),
|
||||
@ -218,8 +218,8 @@ TRACE_EVENT(arm_event,
|
||||
*/
|
||||
TRACE_EVENT(non_standard_event,
|
||||
|
||||
TP_PROTO(const uuid_le *sec_type,
|
||||
const uuid_le *fru_id,
|
||||
TP_PROTO(const guid_t *sec_type,
|
||||
const guid_t *fru_id,
|
||||
const char *fru_text,
|
||||
const u8 sev,
|
||||
const u8 *err,
|
||||
|
Loading…
Reference in New Issue
Block a user