powerpc/pseries: Display machine check error details.
Extract the MCE error details from RTAS extended log and display it to console. With this patch you should now see mce logs like below: [ 142.371818] Severe Machine check interrupt [Recovered] [ 142.371822] NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel] [ 142.371822] Initiator: CPU [ 142.371823] Error type: SLB [Multihit] [ 142.371824] Effective address: d00000000ca70000 Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
		
							parent
							
								
									a43c159042
								
							
						
					
					
						commit
						8f0b80561f
					
				| @ -197,6 +197,11 @@ static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog) | ||||
| 	return (elog->byte1 & 0x04) >> 2; | ||||
| } | ||||
| 
 | ||||
| static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog) | ||||
| { | ||||
| 	return (elog->byte2 & 0xf0) >> 4; | ||||
| } | ||||
| 
 | ||||
| #define rtas_error_type(x)	((x)->byte3) | ||||
| 
 | ||||
| static inline | ||||
|  | ||||
| @ -523,6 +523,136 @@ int pSeries_system_reset_exception(struct pt_regs *regs) | ||||
| 	return 0; /* need to perform reset */ | ||||
| } | ||||
| 
 | ||||
| #define VAL_TO_STRING(ar, val)	\ | ||||
| 	(((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown") | ||||
| 
 | ||||
| static void pseries_print_mce_info(struct pt_regs *regs, | ||||
| 				   struct rtas_error_log *errp) | ||||
| { | ||||
| 	const char *level, *sevstr; | ||||
| 	struct pseries_errorlog *pseries_log; | ||||
| 	struct pseries_mc_errorlog *mce_log; | ||||
| 	u8 error_type, err_sub_type; | ||||
| 	u64 addr; | ||||
| 	u8 initiator = rtas_error_initiator(errp); | ||||
| 	int disposition = rtas_error_disposition(errp); | ||||
| 
 | ||||
| 	static const char * const initiators[] = { | ||||
| 		"Unknown", | ||||
| 		"CPU", | ||||
| 		"PCI", | ||||
| 		"ISA", | ||||
| 		"Memory", | ||||
| 		"Power Mgmt", | ||||
| 	}; | ||||
| 	static const char * const mc_err_types[] = { | ||||
| 		"UE", | ||||
| 		"SLB", | ||||
| 		"ERAT", | ||||
| 		"TLB", | ||||
| 		"D-Cache", | ||||
| 		"Unknown", | ||||
| 		"I-Cache", | ||||
| 	}; | ||||
| 	static const char * const mc_ue_types[] = { | ||||
| 		"Indeterminate", | ||||
| 		"Instruction fetch", | ||||
| 		"Page table walk ifetch", | ||||
| 		"Load/Store", | ||||
| 		"Page table walk Load/Store", | ||||
| 	}; | ||||
| 
 | ||||
| 	/* SLB sub errors valid values are 0x0, 0x1, 0x2 */ | ||||
| 	static const char * const mc_slb_types[] = { | ||||
| 		"Parity", | ||||
| 		"Multihit", | ||||
| 		"Indeterminate", | ||||
| 	}; | ||||
| 
 | ||||
| 	/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ | ||||
| 	static const char * const mc_soft_types[] = { | ||||
| 		"Unknown", | ||||
| 		"Parity", | ||||
| 		"Multihit", | ||||
| 		"Indeterminate", | ||||
| 	}; | ||||
| 
 | ||||
| 	if (!rtas_error_extended(errp)) { | ||||
| 		pr_err("Machine check interrupt: Missing extended error log\n"); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); | ||||
| 	if (pseries_log == NULL) | ||||
| 		return; | ||||
| 
 | ||||
| 	mce_log = (struct pseries_mc_errorlog *)pseries_log->data; | ||||
| 
 | ||||
| 	error_type = mce_log->error_type; | ||||
| 	err_sub_type = rtas_mc_error_sub_type(mce_log); | ||||
| 
 | ||||
| 	switch (rtas_error_severity(errp)) { | ||||
| 	case RTAS_SEVERITY_NO_ERROR: | ||||
| 		level = KERN_INFO; | ||||
| 		sevstr = "Harmless"; | ||||
| 		break; | ||||
| 	case RTAS_SEVERITY_WARNING: | ||||
| 		level = KERN_WARNING; | ||||
| 		sevstr = ""; | ||||
| 		break; | ||||
| 	case RTAS_SEVERITY_ERROR: | ||||
| 	case RTAS_SEVERITY_ERROR_SYNC: | ||||
| 		level = KERN_ERR; | ||||
| 		sevstr = "Severe"; | ||||
| 		break; | ||||
| 	case RTAS_SEVERITY_FATAL: | ||||
| 	default: | ||||
| 		level = KERN_ERR; | ||||
| 		sevstr = "Fatal"; | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr, | ||||
| 	       disposition == RTAS_DISP_FULLY_RECOVERED ? | ||||
| 	       "Recovered" : "Not recovered"); | ||||
| 	if (user_mode(regs)) { | ||||
| 		printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level, | ||||
| 		       regs->nip, current->pid, current->comm); | ||||
| 	} else { | ||||
| 		printk("%s  NIP [%016lx]: %pS\n", level, regs->nip, | ||||
| 		       (void *)regs->nip); | ||||
| 	} | ||||
| 	printk("%s  Initiator: %s\n", level, | ||||
| 	       VAL_TO_STRING(initiators, initiator)); | ||||
| 
 | ||||
| 	switch (error_type) { | ||||
| 	case MC_ERROR_TYPE_UE: | ||||
| 		printk("%s  Error type: %s [%s]\n", level, | ||||
| 		       VAL_TO_STRING(mc_err_types, error_type), | ||||
| 		       VAL_TO_STRING(mc_ue_types, err_sub_type)); | ||||
| 		break; | ||||
| 	case MC_ERROR_TYPE_SLB: | ||||
| 		printk("%s  Error type: %s [%s]\n", level, | ||||
| 		       VAL_TO_STRING(mc_err_types, error_type), | ||||
| 		       VAL_TO_STRING(mc_slb_types, err_sub_type)); | ||||
| 		break; | ||||
| 	case MC_ERROR_TYPE_ERAT: | ||||
| 	case MC_ERROR_TYPE_TLB: | ||||
| 		printk("%s  Error type: %s [%s]\n", level, | ||||
| 		       VAL_TO_STRING(mc_err_types, error_type), | ||||
| 		       VAL_TO_STRING(mc_soft_types, err_sub_type)); | ||||
| 		break; | ||||
| 	default: | ||||
| 		printk("%s  Error type: %s\n", level, | ||||
| 		       VAL_TO_STRING(mc_err_types, error_type)); | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	addr = rtas_mc_get_effective_addr(mce_log); | ||||
| 	if (addr) | ||||
| 		printk("%s    Effective address: %016llx\n", level, addr); | ||||
| } | ||||
| 
 | ||||
| static int mce_handle_error(struct rtas_error_log *errp) | ||||
| { | ||||
| 	struct pseries_errorlog *pseries_log; | ||||
| @ -585,8 +715,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) | ||||
| 	int recovered = 0; | ||||
| 	int disposition = rtas_error_disposition(err); | ||||
| 
 | ||||
| 	pseries_print_mce_info(regs, err); | ||||
| 
 | ||||
| 	if (!(regs->msr & MSR_RI)) { | ||||
| 		/* If MSR_RI isn't set, we cannot recover */ | ||||
| 		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); | ||||
| 		recovered = 0; | ||||
| 
 | ||||
| 	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user