From 184e1fdfea066ab8f12a1e8912f402d2d6556d11 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 15 Jun 2009 15:37:07 +0800 Subject: [PATCH 01/21] x86, mce: fix a race condition about mce_callin and no_way_out If one CPU has no_way_out == 1, all other CPUs should have no_way_out == 1. But despite global_nwo is read after mce_callin, global_nwo is updated after mce_callin too. So it is possible that some CPU read global_nwo before some other CPU update global_nwo, so that no_way_out == 1 for some CPU, while no_way_out == 0 for some other CPU. This patch fixes this race condition via moving mce_callin updating after global_nwo updating, with a smp_wmb in between. A smp_rmb is added between their reading too. Signed-off-by: Huang Ying Acked-by: Andi Kleen Acked-by: Hidetoshi Seto --- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index fabba15e4558..19294b8524cb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -703,6 +703,11 @@ static int mce_start(int no_way_out, int *order) } atomic_add(no_way_out, &global_nwo); + /* + * global_nwo should be updated before mce_callin + */ + smp_wmb(); + *order = atomic_add_return(1, &mce_callin); /* * Wait for everyone. @@ -716,6 +721,10 @@ static int mce_start(int no_way_out, int *order) ndelay(SPINUNIT); } + /* + * mce_callin should be read before global_nwo + */ + smp_rmb(); /* * Cache the global no_way_out state. */ @@ -862,7 +871,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * Establish sequential order between the CPUs entering the machine * check handler. */ - int order; + int order = -1; /* * If no_way_out gets set, there is no safe way to recover from this @@ -887,7 +896,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (!banks) goto out; - order = atomic_add_return(1, &mce_callin); mce_setup(&m); m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); From 33edbf02a92771fa2a81e41084a44ba874e3a5a5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:18:45 +0900 Subject: [PATCH 02/21] x86, mce: don't init timer if !mce_available In mce_cpu_restart, mce_init_timer is called unconditionally. If !mce_available (e.g. mce is disabled), there are no useful work for timer. Stop running it. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 19294b8524cb..dda77215e9e2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1617,8 +1617,9 @@ static int mce_resume(struct sys_device *dev) static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); - if (mce_available(¤t_cpu_data)) - mce_init(); + if (!mce_available(¤t_cpu_data)) + return; + mce_init(); mce_init_timer(); } From 7fb06fc9672b947424e05871243a4c8e19ec3bce Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 18:18:43 +0900 Subject: [PATCH 03/21] x86, mce: cleanup mce_start() Simplify interface of mce_start(): - no_way_out = mce_start(no_way_out, &order); + order = mce_start(&no_way_out); Now Monarch and Subjects share same exit(return) in usual path. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 70 +++++++++++++++----------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index dda77215e9e2..739fd7eca0a4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -691,23 +691,21 @@ static atomic_t global_nwo; * in the entry order. * TBD double check parallel CPU hotunplug */ -static int mce_start(int no_way_out, int *order) +static int mce_start(int *no_way_out) { - int nwo; + int order; int cpus = num_online_cpus(); u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; - if (!timeout) { - *order = -1; - return no_way_out; - } + if (!timeout) + return -1; - atomic_add(no_way_out, &global_nwo); + atomic_add(*no_way_out, &global_nwo); /* * global_nwo should be updated before mce_callin */ smp_wmb(); - *order = atomic_add_return(1, &mce_callin); + order = atomic_add_return(1, &mce_callin); /* * Wait for everyone. @@ -715,8 +713,7 @@ static int mce_start(int no_way_out, int *order) while (atomic_read(&mce_callin) != cpus) { if (mce_timed_out(&timeout)) { atomic_set(&global_nwo, 0); - *order = -1; - return no_way_out; + return -1; } ndelay(SPINUNIT); } @@ -725,34 +722,34 @@ static int mce_start(int no_way_out, int *order) * mce_callin should be read before global_nwo */ smp_rmb(); + + if (order == 1) { + /* + * Monarch: Starts executing now, the others wait. + */ + atomic_set(&mce_executing, 1); + } else { + /* + * Subject: Now start the scanning loop one by one in + * the original callin order. + * This way when there are any shared banks it will be + * only seen by one CPU before cleared, avoiding duplicates. + */ + while (atomic_read(&mce_executing) < order) { + if (mce_timed_out(&timeout)) { + atomic_set(&global_nwo, 0); + return -1; + } + ndelay(SPINUNIT); + } + } + /* * Cache the global no_way_out state. */ - nwo = atomic_read(&global_nwo); + *no_way_out = atomic_read(&global_nwo); - /* - * Monarch starts executing now, the others wait. - */ - if (*order == 1) { - atomic_set(&mce_executing, 1); - return nwo; - } - - /* - * Now start the scanning loop one by one - * in the original callin order. - * This way when there are any shared banks it will - * be only seen by one CPU before cleared, avoiding duplicates. - */ - while (atomic_read(&mce_executing) < *order) { - if (mce_timed_out(&timeout)) { - atomic_set(&global_nwo, 0); - *order = -1; - return no_way_out; - } - ndelay(SPINUNIT); - } - return nwo; + return order; } /* @@ -871,8 +868,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * Establish sequential order between the CPUs entering the machine * check handler. */ - int order = -1; - + int order; /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If tolerant is cranked up, we'll try anyway. @@ -917,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * This way we don't report duplicated events on shared banks * because the first one to see it will clear it. */ - no_way_out = mce_start(no_way_out, &order); + order = mce_start(&no_way_out); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); if (!bank[i]) From 4e5b3e690dda890523e93af9c545261f5916a3a6 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:20:20 +0900 Subject: [PATCH 04/21] x86, mce: add __read_mostly Add __read_mostly to data written during setup. Suggested-by: Ingo Molnar Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 739fd7eca0a4..2d2e0b565a9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -57,7 +57,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; -int mce_disabled; +int mce_disabled __read_mostly; #ifdef CONFIG_X86_NEW_MCE @@ -76,19 +76,19 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors * 3: never panic or SIGBUS, log all errors (for testing only) */ -static int tolerant = 1; -static int banks; -static u64 *bank; -static unsigned long notify_user; -static int rip_msr; -static int mce_bootlog = -1; -static int monarch_timeout = -1; -static int mce_panic_timeout; -static int mce_dont_log_ce; -int mce_cmci_disabled; -int mce_ignore_ce; -int mce_ser; +static int tolerant __read_mostly = 1; +static int banks __read_mostly; +static u64 *bank __read_mostly; +static int rip_msr __read_mostly; +static int mce_bootlog __read_mostly = -1; +static int monarch_timeout __read_mostly = -1; +static int mce_panic_timeout __read_mostly; +static int mce_dont_log_ce __read_mostly; +int mce_cmci_disabled __read_mostly; +int mce_ignore_ce __read_mostly; +int mce_ser __read_mostly; +static unsigned long notify_user; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; From 1020bcbcc7da36001d9226c5d57e999949cb80c5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:20:57 +0900 Subject: [PATCH 05/21] x86, mce: rename static variables around trigger "trigger" is not straight forward name for valiable that holds name of user mode helper program which triggered by machine check events. This patch renames this valiable and kins to more recognizable names. trigger => mce_helper trigger_argv => mce_helper_argv notify_user => mce_need_notify No functional changes. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2d2e0b565a9c..e8b893e880ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -88,9 +88,10 @@ int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; -static unsigned long notify_user; -static char trigger[128]; -static char *trigger_argv[2] = { trigger, NULL }; +/* User mode helper program triggered by machine check event */ +static unsigned long mce_need_notify; +static char mce_helper[128]; +static char *mce_helper_argv[2] = { mce_helper, NULL }; static unsigned long dont_init_banks; @@ -180,7 +181,7 @@ void mce_log(struct mce *mce) wmb(); mce->finished = 1; - set_bit(0, ¬ify_user); + set_bit(0, &mce_need_notify); } static void print_mce(struct mce *m) @@ -1122,7 +1123,7 @@ static void mcheck_timer(unsigned long data) static void mce_do_trigger(struct work_struct *work) { - call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); + call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); } static DECLARE_WORK(mce_trigger_work, mce_do_trigger); @@ -1139,7 +1140,7 @@ int mce_notify_irq(void) clear_thread_flag(TIF_MCE_NOTIFY); - if (test_and_clear_bit(0, ¬ify_user)) { + if (test_and_clear_bit(0, &mce_need_notify)) { wake_up_interruptible(&mce_wait); /* @@ -1147,7 +1148,7 @@ int mce_notify_irq(void) * work_pending is always cleared before the function is * executed. */ - if (trigger[0] && !work_pending(&mce_trigger_work)) + if (mce_helper[0] && !work_pending(&mce_trigger_work)) schedule_work(&mce_trigger_work); if (__ratelimit(&ratelimit)) @@ -1664,9 +1665,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) { - strcpy(buf, trigger); + strcpy(buf, mce_helper); strcat(buf, "\n"); - return strlen(trigger) + 1; + return strlen(mce_helper) + 1; } static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, @@ -1675,10 +1676,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *p; int len; - strncpy(trigger, buf, sizeof(trigger)); - trigger[sizeof(trigger)-1] = 0; - len = strlen(trigger); - p = strchr(trigger, '\n'); + strncpy(mce_helper, buf, sizeof(mce_helper)); + mce_helper[sizeof(mce_helper)-1] = 0; + len = strlen(mce_helper); + p = strchr(mce_helper, '\n'); if (*p) *p = 0; From 9af43b54ab4509f1dac49637d6917d57292e6518 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:21:36 +0900 Subject: [PATCH 06/21] x86, mce: sysfs entries for new mce options Add sysfs interface for admins who want to tweak these options without rebooting the system. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 84 +++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e8b893e880ed..4b62443b6e72 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1626,6 +1626,26 @@ static void mce_restart(void) on_each_cpu(mce_cpu_restart, NULL, 1); } +/* Toggle features for corrected errors */ +static void mce_disable_ce(void *all) +{ + if (!mce_available(¤t_cpu_data)) + return; + if (all) + del_timer_sync(&__get_cpu_var(mce_timer)); + cmci_clear(); +} + +static void mce_enable_ce(void *all) +{ + if (!mce_available(¤t_cpu_data)) + return; + cmci_reenable(); + cmci_recheck(); + if (all) + mce_init_timer(); +} + static struct sysdev_class mce_sysclass = { .suspend = mce_suspend, .shutdown = mce_shutdown, @@ -1687,6 +1707,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, return len; } +static ssize_t set_ignore_ce(struct sys_device *s, + struct sysdev_attribute *attr, + const char *buf, size_t size) +{ + u64 new; + + if (strict_strtoull(buf, 0, &new) < 0) + return -EINVAL; + + if (mce_ignore_ce ^ !!new) { + if (new) { + /* disable ce features */ + on_each_cpu(mce_disable_ce, (void *)1, 1); + mce_ignore_ce = 1; + } else { + /* enable ce features */ + mce_ignore_ce = 0; + on_each_cpu(mce_enable_ce, (void *)1, 1); + } + } + return size; +} + +static ssize_t set_cmci_disabled(struct sys_device *s, + struct sysdev_attribute *attr, + const char *buf, size_t size) +{ + u64 new; + + if (strict_strtoull(buf, 0, &new) < 0) + return -EINVAL; + + if (mce_cmci_disabled ^ !!new) { + if (new) { + /* disable cmci */ + on_each_cpu(mce_disable_ce, NULL, 1); + mce_cmci_disabled = 1; + } else { + /* enable cmci */ + mce_cmci_disabled = 0; + on_each_cpu(mce_enable_ce, NULL, 1); + } + } + return size; +} + static ssize_t store_int_with_restart(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t size) @@ -1699,6 +1765,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); +static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); static struct sysdev_ext_attribute attr_check_interval = { _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, @@ -1706,9 +1773,24 @@ static struct sysdev_ext_attribute attr_check_interval = { &check_interval }; +static struct sysdev_ext_attribute attr_ignore_ce = { + _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), + &mce_ignore_ce +}; + +static struct sysdev_ext_attribute attr_cmci_disabled = { + _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_cmci_disabled), + &mce_cmci_disabled +}; + static struct sysdev_attribute *mce_attrs[] = { - &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, + &attr_tolerant.attr, + &attr_check_interval.attr, + &attr_trigger, &attr_monarch_timeout.attr, + &attr_dont_log_ce.attr, + &attr_ignore_ce.attr, + &attr_cmci_disabled.attr, NULL }; From 9e55e44e39798541ba39d57f4b569deb555ae1ce Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:22:15 +0900 Subject: [PATCH 07/21] x86, mce: unify mce.h There are 2 headers: arch/x86/include/asm/mce.h arch/x86/kernel/cpu/mcheck/mce.h and in the latter small header: #include This patch move all contents in the latter header into the former, and fix all files using the latter to include the former instead. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 36 +++++++++++++++++++-- arch/x86/kernel/cpu/mcheck/k7.c | 3 +- arch/x86/kernel/cpu/mcheck/mce.c | 1 - arch/x86/kernel/cpu/mcheck/mce.h | 38 ----------------------- arch/x86/kernel/cpu/mcheck/mce_intel.c | 3 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 -- arch/x86/kernel/cpu/mcheck/non-fatal.c | 3 +- arch/x86/kernel/cpu/mcheck/p4.c | 3 +- arch/x86/kernel/cpu/mcheck/p5.c | 3 +- arch/x86/kernel/cpu/mcheck/p6.c | 3 +- arch/x86/kernel/cpu/mcheck/winchip.c | 3 +- arch/x86/kernel/traps.c | 3 +- 12 files changed, 42 insertions(+), 59 deletions(-) delete mode 100644 arch/x86/kernel/cpu/mcheck/mce.h diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 540a466e50f5..aae6fe2112f9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -102,10 +102,42 @@ struct mce_log { #ifdef __KERNEL__ +#include +#include +#include + extern int mce_disabled; -#include -#include +#ifdef CONFIG_X86_OLD_MCE +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +#endif + +#ifdef CONFIG_X86_ANCIENT_MCE +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); +extern int mce_p5_enable; +static inline int mce_p5_enabled(void) { return mce_p5_enable; } +static inline void enable_p5_mce(void) { mce_p5_enable = 1; } +#else +static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static inline int mce_p5_enabled(void) { return 0; } +static inline void enable_p5_mce(void) { } +#endif + +/* Call the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); + +#ifdef CONFIG_X86_OLD_MCE +extern int nr_mce_banks; +extern void intel_set_thermal_handler(void); +#else +static inline void intel_set_thermal_handler(void) { } +#endif + +void intel_init_thermal(struct cpuinfo_x86 *c); void mce_setup(struct mce *m); void mce_log(struct mce *m); diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index 89e510424152..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine Check Handler For AMD Athlon/Duron: */ static void k7_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4b62443b6e72..faedd776847d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -44,7 +44,6 @@ #include #include "mce-internal.h" -#include "mce.h" /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 84a552b458c8..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include - -#ifdef CONFIG_X86_OLD_MCE -void amd_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -#endif - -#ifdef CONFIG_X86_ANCIENT_MCE -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); -extern int mce_p5_enable; -static inline int mce_p5_enabled(void) { return mce_p5_enable; } -static inline void enable_p5_mce(void) { mce_p5_enable = 1; } -#else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline int mce_p5_enabled(void) { return 0; } -static inline void enable_p5_mce(void) { } -#endif - -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); - -#ifdef CONFIG_X86_OLD_MCE - -extern int nr_mce_banks; - -void intel_set_thermal_handler(void); - -#else - -static inline void intel_set_thermal_handler(void) { } - -#endif - -void intel_init_thermal(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 2b011d2d8579..475478b20884 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -11,10 +11,9 @@ #include #include #include +#include #include -#include "mce.h" - void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index f2ef6952c400..c548111d011b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -16,8 +16,6 @@ #include #include -#include "mce.h" - asmlinkage void smp_thermal_interrupt(void) { __u64 msr_val; diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f74..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -17,10 +17,9 @@ #include #include +#include #include -#include "mce.h" - static int firstbank; #define MCE_RATE (15*HZ) /* timer rate is 15s */ diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d3..8d3e40edd64d 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -12,10 +12,9 @@ #include #include #include +#include #include -#include "mce.h" - /* as supported by the P4/Xeon family */ struct intel_mce_extended_msrs { u32 eax; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b0..747853f9188d 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* By default disabled */ int mce_p5_enable; diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e667457..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine Check Handler For PII/PIII */ static void intel_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090b..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -9,10 +9,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e1e27b7d438..71f9c74814d8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -64,8 +65,6 @@ #include #include -#include "cpu/mcheck/mce.h" - asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ From c697836985e18d9c34897428ba563b13044a6dcd Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:22:49 +0900 Subject: [PATCH 08/21] x86, mce: make mce_disabled boolean The mce_disabled on 32bit is a tristate variable [1,0,-1], while 64bit version is boolean [0,1]. This patch makes mce_disabled always boolean, and use mce_p5_enabled to indicate the third state instead. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 8 +++----- arch/x86/kernel/cpu/mcheck/mce.c | 8 +++----- arch/x86/kernel/cpu/mcheck/p5.c | 12 +++++------- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index aae6fe2112f9..6568cdedcd89 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -107,6 +107,7 @@ struct mce_log { #include extern int mce_disabled; +extern int mce_p5_enabled; #ifdef CONFIG_X86_OLD_MCE void amd_mcheck_init(struct cpuinfo_x86 *c); @@ -117,14 +118,11 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_ANCIENT_MCE void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); -extern int mce_p5_enable; -static inline int mce_p5_enabled(void) { return mce_p5_enable; } -static inline void enable_p5_mce(void) { mce_p5_enable = 1; } +static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline int mce_p5_enabled(void) { return 0; } -static inline void enable_p5_mce(void) { } +static inline void enable_p5_mce(void) {} #endif /* Call the installed machine check handler for this CPU setup. */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index faedd776847d..6095e0296abd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1286,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) return; switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (mce_p5_enabled()) - intel_p5_mcheck_init(c); + intel_p5_mcheck_init(c); break; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); @@ -2002,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { - if (mce_disabled == 1) + if (mce_disabled) return; switch (c->x86_vendor) { @@ -2032,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_enable(char *str) { - mce_disabled = -1; + mce_p5_enabled = 1; return 1; } - __setup("mce", mcheck_enable); #endif /* CONFIG_X86_OLD_MCE */ diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 747853f9188d..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -14,7 +14,7 @@ #include /* By default disabled */ -int mce_p5_enable; +int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) @@ -42,16 +42,14 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; + /* Default P5 to off as its often misconnected: */ + if (!mce_p5_enabled) + return; + /* Check for MCE support: */ if (!cpu_has(c, X86_FEATURE_MCE)) return; -#ifdef CONFIG_X86_OLD_MCE - /* Default P5 to off as its often misconnected: */ - if (mce_disabled != -1) - return; -#endif - machine_check_vector = pentium_machine_check; /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); From 3adacb70d32046ccc9f0333b50bb2ba1582ccdf4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:23:28 +0900 Subject: [PATCH 09/21] x86, mce: unify smp_thermal_interrupt, prepare p4 Remove unused argument regs from handlers, and use inc_irq_stat. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/p4.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 8d3e40edd64d..ddeed6e295af 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -35,7 +35,7 @@ static int mce_num_extended_msrs; #ifdef CONFIG_X86_MCE_P4THERMAL -static void unexpected_thermal_interrupt(struct pt_regs *regs) +static void unexpected_thermal_interrupt(void) { printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", smp_processor_id()); @@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) } /* P4/Xeon Thermal transition interrupt handler: */ -static void intel_thermal_interrupt(struct pt_regs *regs) +static void intel_thermal_interrupt(void) { __u64 msr_val; @@ -54,14 +54,13 @@ static void intel_thermal_interrupt(struct pt_regs *regs) } /* Thermal interrupt handler for this CPU setup: */ -static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = - unexpected_thermal_interrupt; +static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; void smp_thermal_interrupt(struct pt_regs *regs) { irq_enter(); - vendor_thermal_interrupt(regs); - __get_cpu_var(irq_stat).irq_thermal_count++; + vendor_thermal_interrupt(); + inc_irq_stat(irq_thermal_count); irq_exit(); } From 5335612a574a45beab14193ec641ed2f45e7a523 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:24:09 +0900 Subject: [PATCH 10/21] x86, mce: unify smp_thermal_interrupt, prepare mce_intel_64 Break smp_thermal_interrupt() into two functions. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c548111d011b..a5232b2c4ca0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -16,19 +16,21 @@ #include #include -asmlinkage void smp_thermal_interrupt(void) +static void intel_thermal_interrupt(void) { __u64 msr_val; - ack_APIC_irq(); - - exit_idle(); - irq_enter(); - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) mce_log_therm_throt_event(msr_val); +} +asmlinkage void smp_thermal_interrupt(void) +{ + ack_APIC_irq(); + exit_idle(); + irq_enter(); + intel_thermal_interrupt(); inc_irq_stat(irq_thermal_count); irq_exit(); } From e8ce2c5ee826b3787202493effcd08d4b1e1e639 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:24:40 +0900 Subject: [PATCH 11/21] x86, mce: unify smp_thermal_interrupt, prepare Let them in same shape. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 25 +++++++++++++---------- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 20 ++++++++++++++++-- arch/x86/kernel/cpu/mcheck/p4.c | 10 +++++---- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6568cdedcd89..3bc827c0f409 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -110,6 +110,7 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_OLD_MCE +extern int nr_mce_banks; void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); @@ -128,15 +129,6 @@ static inline void enable_p5_mce(void) {} /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); -#ifdef CONFIG_X86_OLD_MCE -extern int nr_mce_banks; -extern void intel_set_thermal_handler(void); -#else -static inline void intel_set_thermal_handler(void) { } -#endif - -void intel_init_thermal(struct cpuinfo_x86 *c); - void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); @@ -175,8 +167,6 @@ int mce_available(struct cpuinfo_x86 *c); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); -void mce_log_therm_throt_event(__u64 status); - extern atomic_t mce_entry; void do_machine_check(struct pt_regs *, long); @@ -205,5 +195,18 @@ void mcheck_init(struct cpuinfo_x86 *c); extern void (*mce_threshold_vector)(void); +/* + * Thermal handler + */ + +void intel_set_thermal_handler(void); +void intel_init_thermal(struct cpuinfo_x86 *c); + +#ifdef CONFIG_X86_NEW_MCE +void mce_log_therm_throt_event(__u64 status); +#else +static inline void mce_log_therm_throt_event(__u64 status) {} +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index a5232b2c4ca0..922e3a482f6f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -16,6 +16,14 @@ #include #include +static void unexpected_thermal_interrupt(void) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler: */ static void intel_thermal_interrupt(void) { __u64 msr_val; @@ -25,14 +33,22 @@ static void intel_thermal_interrupt(void) mce_log_therm_throt_event(msr_val); } +/* Thermal interrupt handler for this CPU setup: */ +static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; + asmlinkage void smp_thermal_interrupt(void) { - ack_APIC_irq(); exit_idle(); irq_enter(); - intel_thermal_interrupt(); inc_irq_stat(irq_thermal_count); + intel_thermal_interrupt(); irq_exit(); + ack_APIC_irq(); +} + +void intel_set_thermal_handler(void) +{ + vendor_thermal_interrupt = intel_thermal_interrupt; } /* diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index ddeed6e295af..75313f5b624e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -47,10 +48,9 @@ static void intel_thermal_interrupt(void) { __u64 msr_val; - ack_APIC_irq(); - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & THERM_STATUS_PROCHOT); + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + mce_log_therm_throt_event(msr_val); } /* Thermal interrupt handler for this CPU setup: */ @@ -58,10 +58,12 @@ static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; void smp_thermal_interrupt(struct pt_regs *regs) { + exit_idle(); irq_enter(); - vendor_thermal_interrupt(); inc_irq_stat(irq_thermal_count); + vendor_thermal_interrupt(); irq_exit(); + ack_APIC_irq(); } void intel_set_thermal_handler(void) From a65c88dd2c83b569dbd13778da689861bdf977f2 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:25:27 +0900 Subject: [PATCH 12/21] x86, mce: unify smp_thermal_interrupt Put common functions into therm_throt.c, modify Makefile. unexpected_thermal_interrupt intel_thermal_interrupt smp_thermal_interrupt intel_set_thermal_handler Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/Makefile | 7 ++-- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 38 ------------------- arch/x86/kernel/cpu/mcheck/p4.c | 45 ----------------------- arch/x86/kernel/cpu/mcheck/therm_throt.c | 40 +++++++++++++++++++- 4 files changed, 43 insertions(+), 87 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 45004faf67ea..53df57d11c50 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,11 +1,12 @@ -obj-y = mce.o therm_throt.o +obj-y = mce.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o -obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o + +obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o mce_intel.o diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 922e3a482f6f..3b7a0572e2fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -9,48 +9,10 @@ #include #include #include -#include #include #include -#include -#include #include -static void unexpected_thermal_interrupt(void) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler: */ -static void intel_thermal_interrupt(void) -{ - __u64 msr_val; - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) - mce_log_therm_throt_event(msr_val); -} - -/* Thermal interrupt handler for this CPU setup: */ -static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; - -asmlinkage void smp_thermal_interrupt(void) -{ - exit_idle(); - irq_enter(); - inc_irq_stat(irq_thermal_count); - intel_thermal_interrupt(); - irq_exit(); - ack_APIC_irq(); -} - -void intel_set_thermal_handler(void) -{ - vendor_thermal_interrupt = intel_thermal_interrupt; -} - /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 75313f5b624e..76c5f0305f9f 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -1,8 +1,6 @@ /* * P4 specific Machine Check Exception Reporting */ - -#include #include #include #include @@ -10,9 +8,6 @@ #include #include -#include -#include -#include #include #include @@ -33,46 +28,6 @@ struct intel_mce_extended_msrs { static int mce_num_extended_msrs; - -#ifdef CONFIG_X86_MCE_P4THERMAL - -static void unexpected_thermal_interrupt(void) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler: */ -static void intel_thermal_interrupt(void) -{ - __u64 msr_val; - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) - mce_log_therm_throt_event(msr_val); -} - -/* Thermal interrupt handler for this CPU setup: */ -static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; - -void smp_thermal_interrupt(struct pt_regs *regs) -{ - exit_idle(); - irq_enter(); - inc_irq_stat(irq_thermal_count); - vendor_thermal_interrupt(); - irq_exit(); - ack_APIC_irq(); -} - -void intel_set_thermal_handler(void) -{ - vendor_thermal_interrupt = intel_thermal_interrupt; -} - -#endif /* CONFIG_X86_MCE_P4THERMAL */ - /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) { diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7b1ae2e20ba5..b3792b196856 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -13,6 +13,7 @@ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Inspired by Ross Biro's and Al Borchers' counter code. */ +#include #include #include #include @@ -20,6 +21,8 @@ #include #include +#include +#include /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -186,6 +189,41 @@ static __init int thermal_throttle_init_device(void) return 0; } - device_initcall(thermal_throttle_init_device); + #endif /* CONFIG_SYSFS */ + +/* Thermal transition interrupt handler */ +void intel_thermal_interrupt(void) +{ + __u64 msr_val; + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + mce_log_therm_throt_event(msr_val); +} + +static void unexpected_thermal_interrupt(void) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; + +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) +{ + exit_idle(); + irq_enter(); + inc_irq_stat(irq_thermal_count); + smp_thermal_vector(); + irq_exit(); + /* Ack only at the end to avoid potential reentry */ + ack_APIC_irq(); +} + +void intel_set_thermal_handler(void) +{ + smp_thermal_vector = intel_thermal_interrupt; +} From 895287c0a6aa571160c47ee10de11b542166c4f9 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:26:10 +0900 Subject: [PATCH 13/21] x86, mce: squash mce_intel.c into therm_throt.c move intel_init_thermal() into therm_throt.c Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/Makefile | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel.c | 73 ------------------------ arch/x86/kernel/cpu/mcheck/therm_throt.c | 66 +++++++++++++++++++++ 3 files changed, 67 insertions(+), 74 deletions(-) delete mode 100644 arch/x86/kernel/cpu/mcheck/mce_intel.c diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 53df57d11c50..659564e5fc0f 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -9,4 +9,4 @@ obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o -obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o mce_intel.o +obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c deleted file mode 100644 index 475478b20884..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Common code for Intel machine checks - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -void intel_init_thermal(struct cpuinfo_x86 *c) -{ - unsigned int cpu = smp_processor_id(); - int tm2 = 0; - u32 l, h; - - /* Thermal monitoring depends on ACPI and clock modulation*/ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) - return; - - /* - * First check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already: - */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); - return; - } - - if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) - tm2 = 1; - - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - - /* We'll mask the thermal vector in the lapic till we're ready: */ - h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; - apic_write(APIC_LVTTHMR, h); - - rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); - - intel_set_thermal_handler(); - - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); -} diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3792b196856..7a508aaafcea 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -16,13 +16,21 @@ #include #include #include +#include #include #include +#include +#include +#include #include #include +#include +#include +#include #include #include +#include /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -227,3 +235,61 @@ void intel_set_thermal_handler(void) { smp_thermal_vector = intel_thermal_interrupt; } + +void intel_init_thermal(struct cpuinfo_x86 *c) +{ + unsigned int cpu = smp_processor_id(); + int tm2 = 0; + u32 l, h; + + /* Thermal monitoring depends on ACPI and clock modulation*/ + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + return; + + /* + * First check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already: + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) + tm2 = 1; + + /* Check whether a vector already exists */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; + } + + /* We'll mask the thermal vector in the lapic till we're ready: */ + h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, + l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); + + intel_set_thermal_handler(); + + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); + + /* Unmask the thermal vector: */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); +} From 8363fc82d36c0886292e33925391dca93f03bd50 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:26:36 +0900 Subject: [PATCH 14/21] x86, mce: remove intel_set_thermal_handler() and make intel_thermal_interrupt() static. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 - arch/x86/kernel/cpu/mcheck/therm_throt.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 3bc827c0f409..365a594b41bc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -199,7 +199,6 @@ extern void (*mce_threshold_vector)(void); * Thermal handler */ -void intel_set_thermal_handler(void); void intel_init_thermal(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_NEW_MCE diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7a508aaafcea..7c7944cc515d 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -202,7 +202,7 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ /* Thermal transition interrupt handler */ -void intel_thermal_interrupt(void) +static void intel_thermal_interrupt(void) { __u64 msr_val; @@ -231,11 +231,6 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } -void intel_set_thermal_handler(void) -{ - smp_thermal_vector = intel_thermal_interrupt; -} - void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -278,7 +273,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_THERM_INTERRUPT, l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); - intel_set_thermal_handler(); + smp_thermal_vector = intel_thermal_interrupt; rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); From 1149e7264528723b8c3b075a90386ceb2e07070a Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:27:13 +0900 Subject: [PATCH 15/21] x86, mce: remove therm_throt.h Now all symbols in the header are static. Remove the header. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/therm_throt.h | 9 --------- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 1 - arch/x86/kernel/cpu/mcheck/p4.c | 1 - arch/x86/kernel/cpu/mcheck/therm_throt.c | 5 ++--- 4 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 arch/x86/include/asm/therm_throt.h diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee7860..000000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _ASM_X86_THERM_THROT_H -#define _ASM_X86_THERM_THROT_H - -#include - -extern atomic_t therm_throt_en; -int therm_throt_process(int curr); - -#endif /* _ASM_X86_THERM_THROT_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 3b7a0572e2fe..663a88e8b3c7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -11,7 +11,6 @@ #include #include #include -#include /* * Support for Intel Correct Machine Check Interrupts. This allows diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 76c5f0305f9f..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7c7944cc515d..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -24,7 +24,6 @@ #include #include -#include #include #include #include @@ -38,7 +37,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -atomic_t therm_throt_en = ATOMIC_INIT(0); +static atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ @@ -93,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { * 1 : Event should be logged further, and a message has been * printed to the syslog. */ -int therm_throt_process(int curr) +static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); From 58995d2d58e8e555bc92582abe7554921deea3aa Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:27:47 +0900 Subject: [PATCH 16/21] x86, mce: mce.h cleanup Reorder definitions. - static inline dummy mcheck_init() for !CONFIG_X86_MCE - gather defs for exception, threshold handler Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 365a594b41bc..5cdd8d100ec9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -109,6 +109,12 @@ struct mce_log { extern int mce_disabled; extern int mce_p5_enabled; +#ifdef CONFIG_X86_MCE +void mcheck_init(struct cpuinfo_x86 *c); +#else +static inline void mcheck_init(struct cpuinfo_x86 *c) {} +#endif + #ifdef CONFIG_X86_OLD_MCE extern int nr_mce_banks; void amd_mcheck_init(struct cpuinfo_x86 *c); @@ -126,13 +132,9 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); - void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); -extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * To support more than 128 would need to escape the predefined @@ -169,8 +171,6 @@ DECLARE_PER_CPU(unsigned, mce_poll_count); extern atomic_t mce_entry; -void do_machine_check(struct pt_regs *, long); - typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); @@ -187,13 +187,20 @@ void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; -#ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); -#else -#define mcheck_init(c) do { } while (0) -#endif +/* + * Exception handler + */ + +/* Call the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); +void do_machine_check(struct pt_regs *, long); + +/* + * Threshold handler + */ extern void (*mce_threshold_vector)(void); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * Thermal handler From 1af0815f9639752409a9c15ba6a3dc0f322fd114 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:28:38 +0900 Subject: [PATCH 17/21] x86, mce: rename _64.c files which are no longer 64-bit-specific Rename files that are no longer 64bit specific: mce_amd_64.c => mce_amd.c mce_intel_64.c => mce_intel.c Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/Makefile | 4 ++-- arch/x86/kernel/cpu/mcheck/{mce_amd_64.c => mce_amd.c} | 0 arch/x86/kernel/cpu/mcheck/{mce_intel_64.c => mce_intel.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename arch/x86/kernel/cpu/mcheck/{mce_amd_64.c => mce_amd.c} (100%) rename arch/x86/kernel/cpu/mcheck/{mce_intel_64.c => mce_intel.c} (100%) diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 659564e5fc0f..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -3,8 +3,8 @@ obj-y = mce.o obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o -obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o +obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c similarity index 100% rename from arch/x86/kernel/cpu/mcheck/mce_amd_64.c rename to arch/x86/kernel/cpu/mcheck/mce_amd.c diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c similarity index 100% rename from arch/x86/kernel/cpu/mcheck/mce_intel_64.c rename to arch/x86/kernel/cpu/mcheck/mce_intel.c From 1bf7b31efa0c322d93cb3f772cd9bc743e8bb42d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 17 Jun 2009 08:31:15 -0700 Subject: [PATCH 18/21] x86, mce: mce_intel.c needs mce_intel.c uses apic_write() and lapic_get_maxlvt(), and so it needs . Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Hidetoshi Seto --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 663a88e8b3c7..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include From 74b602c7147212a7495879ec23fe6c2d3b470e06 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 17 Jun 2009 14:43:32 -0700 Subject: [PATCH 19/21] x86: fix duplicated sysfs attribute The sysfs attribute cmci_disabled was accidentall turned into a duplicate of ignore_ce, breaking all other attributes. Signed-off-by: Yinghai Lu Acked-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2a560cefb675..5aac9e4dd136 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1777,7 +1777,7 @@ static struct sysdev_ext_attribute attr_ignore_ce = { }; static struct sysdev_ext_attribute attr_cmci_disabled = { - _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_cmci_disabled), + _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), &mce_cmci_disabled }; From e92fae064ae42b2a4a77646f7655bca4c87bb1eb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 17 Jun 2009 16:21:33 -0700 Subject: [PATCH 20/21] x86: use zalloc_cpumask_var for mce_dev_initialized We need a cleared cpu_mask to record if mce is initialized, especially when MAXSMP is used. used zalloc_... instead Signed-off-by: Yinghai Lu Reviewed-by: Hidetoshi Seto Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5aac9e4dd136..c2fb70d0286f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1969,7 +1969,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); + zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) From b1f49f9582f9be6de5055cfa97eabf6246f2eaf7 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 18 Jun 2009 14:53:24 +0900 Subject: [PATCH 21/21] x86, mce: fix error path in mce_create_device() Don't skip removing mce_attrs in route from error2. Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Cc: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c2fb70d0286f..284d1de968bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1798,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; static __cpuinit int mce_create_device(unsigned int cpu) { int err; - int i; + int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; @@ -1816,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } - for (i = 0; i < banks; i++) { + for (j = 0; j < banks; j++) { err = sysdev_create_file(&per_cpu(mce_dev, cpu), - &bank_attrs[i]); + &bank_attrs[j]); if (err) goto error2; } @@ -1826,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) return 0; error2: - while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); + while (--j >= 0) + sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); error: while (--i >= 0) sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);