Option to let the bios set per-bank CMCI thresholds so they can
filter noisy error sources at a fine grained level based on platform specific knowledge. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) iQIcBAABAgAGBQJQZI6dAAoJEKurIx+X31iB2EQP/jnUqMWjmgwqpwFwag8ogrXq 92EU9phTnyQFCQULpIPSPlKgqU9WqPdPUq3c31ZOE6QKx7OhCfj6N5ZBGRgYhhGO zvBuj6cpGfmW448cVQnjhZ9uiIfPZPMXgGun+b1cYI4rDcjtBScNB6D5SuCPtWyx pisLOzvbd/Vnhd7XsLnFtKFSZ764vh2+yLhFLH6rOjUqyv2gHjAkqdJKor1ofzJ2 akwtOJcvm43MyxXOnJeENJ1tjx4DNp+/dl8dIEO/AvBy+zasfpkNNFn1nlMx7t9w H2KbSRUkmQ4sXZ7VTrIIYw79GtNTL8okYVy3G5k39ISIp4ZT8IYf4YwosuAUmpaL ivaKm/ifpd5yW661izIJuQOJqv/cZYfXJJpinGsTLvIMJyZpNcq3aC5k0Chyr51p L+gYJK6xIQQm/NA6Zsc6KPKcKHSoVhdsSnIRmOCdK8V/SpNbw7P3vVwKUzyATmEV f+2Dp24EsgmARmn9oKl7Xyj/jy8fcSovOs6bmG5oDi7aj0IiSIe894k+MiQY7c5W a5WWkn0KXzcVMPm19MFP9CLGYSFLLOKGbROzfRLbCgbvD1Ev+o9Ycg1tdMsBA3cX L8i6Vf1A9h7WgofX3LynC8hywQUUTJC5SZqlan0rY9JG8+VOR0V6XFVccX65Fxgl lv4KYq2zsleo3ntfBE0E =n9qI -----END PGP SIGNATURE----- Merge tag 'please-pull-naveen' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/mce Pull MCE updates from Tony Luck: "Option to let the bios set per-bank CMCI thresholds so they can filter noisy error sources at a fine grained level based on platform specific knowledge." Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
39ba5010d3
@ -50,6 +50,13 @@ Machine check
|
|||||||
monarchtimeout:
|
monarchtimeout:
|
||||||
Sets the time in us to wait for other CPUs on machine checks. 0
|
Sets the time in us to wait for other CPUs on machine checks. 0
|
||||||
to disable.
|
to disable.
|
||||||
|
mce=bios_cmci_threshold
|
||||||
|
Don't overwrite the bios-set CMCI threshold. This boot option
|
||||||
|
prevents Linux from overwriting the CMCI threshold set by the
|
||||||
|
bios. Without this option, Linux always sets the CMCI
|
||||||
|
threshold to 1. Enabling this may make memory predictive failure
|
||||||
|
analysis less effective if the bios sets thresholds for memory
|
||||||
|
errors since we will not see details for all errors.
|
||||||
|
|
||||||
nomce (for compatibility with i386): same as mce=off
|
nomce (for compatibility with i386): same as mce=off
|
||||||
|
|
||||||
|
@ -161,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
|
|||||||
#ifdef CONFIG_X86_MCE_INTEL
|
#ifdef CONFIG_X86_MCE_INTEL
|
||||||
extern int mce_cmci_disabled;
|
extern int mce_cmci_disabled;
|
||||||
extern int mce_ignore_ce;
|
extern int mce_ignore_ce;
|
||||||
|
extern int mce_bios_cmci_threshold;
|
||||||
void mce_intel_feature_init(struct cpuinfo_x86 *c);
|
void mce_intel_feature_init(struct cpuinfo_x86 *c);
|
||||||
void cmci_clear(void);
|
void cmci_clear(void);
|
||||||
void cmci_reenable(void);
|
void cmci_reenable(void);
|
||||||
|
@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
|
|||||||
int mce_cmci_disabled __read_mostly;
|
int mce_cmci_disabled __read_mostly;
|
||||||
int mce_ignore_ce __read_mostly;
|
int mce_ignore_ce __read_mostly;
|
||||||
int mce_ser __read_mostly;
|
int mce_ser __read_mostly;
|
||||||
|
int mce_bios_cmci_threshold __read_mostly;
|
||||||
|
|
||||||
struct mce_bank *mce_banks __read_mostly;
|
struct mce_bank *mce_banks __read_mostly;
|
||||||
|
|
||||||
@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
|
|||||||
* check, or 0 to not wait
|
* check, or 0 to not wait
|
||||||
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
||||||
* mce=nobootlog Don't log MCEs from before booting.
|
* mce=nobootlog Don't log MCEs from before booting.
|
||||||
|
* mce=bios_cmci_threshold Don't program the CMCI threshold
|
||||||
*/
|
*/
|
||||||
static int __init mcheck_enable(char *str)
|
static int __init mcheck_enable(char *str)
|
||||||
{
|
{
|
||||||
@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str)
|
|||||||
mce_ignore_ce = 1;
|
mce_ignore_ce = 1;
|
||||||
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
||||||
mce_bootlog = (str[0] == 'b');
|
mce_bootlog = (str[0] == 'b');
|
||||||
|
else if (!strcmp(str, "bios_cmci_threshold"))
|
||||||
|
mce_bios_cmci_threshold = 1;
|
||||||
else if (isdigit(str[0])) {
|
else if (isdigit(str[0])) {
|
||||||
get_option(&str, &tolerant);
|
get_option(&str, &tolerant);
|
||||||
if (*str == ',') {
|
if (*str == ',') {
|
||||||
@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
|
|||||||
&mce_cmci_disabled
|
&mce_cmci_disabled
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
|
||||||
|
__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
|
||||||
|
&mce_bios_cmci_threshold
|
||||||
|
};
|
||||||
|
|
||||||
static struct device_attribute *mce_device_attrs[] = {
|
static struct device_attribute *mce_device_attrs[] = {
|
||||||
&dev_attr_tolerant.attr,
|
&dev_attr_tolerant.attr,
|
||||||
&dev_attr_check_interval.attr,
|
&dev_attr_check_interval.attr,
|
||||||
@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
|
|||||||
&dev_attr_dont_log_ce.attr,
|
&dev_attr_dont_log_ce.attr,
|
||||||
&dev_attr_ignore_ce.attr,
|
&dev_attr_ignore_ce.attr,
|
||||||
&dev_attr_cmci_disabled.attr,
|
&dev_attr_cmci_disabled.attr,
|
||||||
|
&dev_attr_bios_cmci_threshold.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -181,10 +181,12 @@ static void cmci_discover(int banks)
|
|||||||
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int i;
|
int i;
|
||||||
|
int bios_wrong_thresh = 0;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||||
for (i = 0; i < banks; i++) {
|
for (i = 0; i < banks; i++) {
|
||||||
u64 val;
|
u64 val;
|
||||||
|
int bios_zero_thresh = 0;
|
||||||
|
|
||||||
if (test_bit(i, owned))
|
if (test_bit(i, owned))
|
||||||
continue;
|
continue;
|
||||||
@ -198,8 +200,20 @@ static void cmci_discover(int banks)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
if (!mce_bios_cmci_threshold) {
|
||||||
val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
|
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||||
|
val |= CMCI_THRESHOLD;
|
||||||
|
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
||||||
|
/*
|
||||||
|
* If bios_cmci_threshold boot option was specified
|
||||||
|
* but the threshold is zero, we'll try to initialize
|
||||||
|
* it to 1.
|
||||||
|
*/
|
||||||
|
bios_zero_thresh = 1;
|
||||||
|
val |= CMCI_THRESHOLD;
|
||||||
|
}
|
||||||
|
|
||||||
|
val |= MCI_CTL2_CMCI_EN;
|
||||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||||
|
|
||||||
@ -207,11 +221,26 @@ static void cmci_discover(int banks)
|
|||||||
if (val & MCI_CTL2_CMCI_EN) {
|
if (val & MCI_CTL2_CMCI_EN) {
|
||||||
set_bit(i, owned);
|
set_bit(i, owned);
|
||||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||||
|
/*
|
||||||
|
* We are able to set thresholds for some banks that
|
||||||
|
* had a threshold of 0. This means the BIOS has not
|
||||||
|
* set the thresholds properly or does not work with
|
||||||
|
* this boot option. Note down now and report later.
|
||||||
|
*/
|
||||||
|
if (mce_bios_cmci_threshold && bios_zero_thresh &&
|
||||||
|
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
||||||
|
bios_wrong_thresh = 1;
|
||||||
} else {
|
} else {
|
||||||
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||||
|
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
|
||||||
|
pr_info_once(
|
||||||
|
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
||||||
|
pr_info_once(
|
||||||
|
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -249,7 +278,7 @@ void cmci_clear(void)
|
|||||||
continue;
|
continue;
|
||||||
/* Disable CMCI */
|
/* Disable CMCI */
|
||||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||||
val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
|
val &= ~MCI_CTL2_CMCI_EN;
|
||||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user