- Get rid of a bunch of function pointers used in MCA land in favor

of normal functions. This is in preparation of making the MCA code
 noinstr-aware
 
 - When the kernel copies data from user addresses and it encounters a
 machine check, a SIGBUS is sent to that process. Change this action to
 either an -EFAULT which is returned to the user or a short write, making
 the recovery action a lot more user-friendly
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmF/s8sACgkQEsHwGGHe
 VUqnaQ/8DIHkIOF6vy2w56snJwCj0XQYNLO+Clf6sHJ7ukWpWDoAi6HzvjqrBmaa
 bQEdOLeO92wGtVutCQ5ndzq2SJ6UFcZtOulpHyzCpwNinhY2QMsPG6pkSzeaAy/e
 aR4gpTY6pyCJyWl5DXXr7FMzBZVaWYdtZ2szPKmW1d1mLeDIdv5d3hInDbZ48XJF
 o+fZx0uuK0CIuDjDujRNvkPbHXLbBSqSLCTRf66o+sCY5ZXHlAipabxa3UmhHKvd
 dBxMrlObAaDBmDjqpc/YpS4IfWZb7+rHQfVmiq5O85ExXx6cyF6vlM7GI/5VBxSA
 2dVcZX/3TsSqGbFdVygbcF6e/Yl1xhP5AE+pBb5jpzbzEaf4oiM8MDhoMAai3lEL
 7CFsXL2oyAzho7QQsUSkv/hffHHrph2/aUZbGJlz6SdeRF9aoIjZANpcwm44TZrk
 c11Fh1MLTDxx8uhCGrYFXqR8QgeTi4B+8d/CEXNJnkLXZMfSUtoL1iIzhBpsGkv3
 r0JOIG2o5dGX2lLhQOiHZ+us33O1e8mvOli9P1jLoDttoKvNqSqLUuwpBCz4sc0E
 ugfarf7v/R07NN+7SIT+O83ZG8dXxIRPzHm/g7wjZYgyOfEBgFSMBKVWXRotPo/f
 aY88sDVyvF5sbYnUcA6zZANBCKAVfilqdMgCyaoGegoNGzDOCYE=
 =bIZq
 -----END PGP SIGNATURE-----

Merge tag 'ras_core_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Get rid of a bunch of function pointers used in MCA land in favor of
   normal functions. This is in preparation of making the MCA code
   noinstr-aware

 - When the kernel copies data from user addresses and it encounters a
   machine check, a SIGBUS is sent to that process. Change this action
   to either an -EFAULT which is returned to the user or a short write,
   making the recovery action a lot more user-friendly

* tag 'ras_core_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Sort mca_config members to get rid of unnecessary padding
  x86/mce: Get rid of the ->quirk_no_way_out() indirect call
  x86/mce: Get rid of msr_ops
  x86/mce: Get rid of machine_check_vector
  x86/mce: Get rid of the mce_severity function pointer
  x86/mce: Drop copyin special case for #MC
  x86/mce: Change to not send SIGBUS error during copy from user
This commit is contained in:
Linus Torvalds 2021-11-01 15:12:04 -07:00
commit 158405e888
8 changed files with 161 additions and 208 deletions

View File

@ -205,28 +205,16 @@ struct cper_ia_proc_ctx;
int mcheck_init(void); int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c);
void mcheck_vendor_init_severity(void);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id); u64 lapic_id);
#else #else
static inline int mcheck_init(void) { return 0; } static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
static inline void mcheck_vendor_init_severity(void) {}
static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; } u64 lapic_id) { return -EINVAL; }
#endif #endif
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
#else
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
#endif
void mce_setup(struct mce *m); void mce_setup(struct mce *m);
void mce_log(struct mce *m); void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device); DECLARE_PER_CPU(struct device *, mce_device);

View File

@ -526,7 +526,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */ /* Fall back to method we used for older processors: */
switch (block) { switch (block) {
case 0: case 0:
addr = msr_ops.misc(bank); addr = mca_msr_reg(bank, MCA_MISC);
break; break;
case 1: case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21); offset = ((low & MASK_BLKPTR_LO) >> 21);
@ -978,8 +978,8 @@ static void log_error_deferred(unsigned int bank)
{ {
bool defrd; bool defrd;
defrd = _log_error_bank(bank, msr_ops.status(bank), defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
msr_ops.addr(bank), 0); mca_msr_reg(bank, MCA_ADDR), 0);
if (!mce_flags.smca) if (!mce_flags.smca)
return; return;
@ -1009,7 +1009,7 @@ static void amd_deferred_error_interrupt(void)
static void log_error_thresholding(unsigned int bank, u64 misc) static void log_error_thresholding(unsigned int bank, u64 misc)
{ {
_log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc); _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc);
} }
static void log_and_reset_block(struct threshold_block *block) static void log_and_reset_block(struct threshold_block *block)
@ -1397,7 +1397,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
} }
} }
err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err) if (err)
goto out_kobj; goto out_kobj;

View File

@ -121,8 +121,6 @@ mce_banks_t mce_banks_ce_disabled;
static struct work_struct mce_work; static struct work_struct mce_work;
static struct irq_work mce_irq_work; static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
/* /*
* CPU/chipset specific EDAC code can register a notifier call here to print * CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form. * MCE errors in a human-readable form.
@ -176,53 +174,27 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
} }
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
static inline u32 ctl_reg(int bank) u32 mca_msr_reg(int bank, enum mca_msr reg)
{ {
return MSR_IA32_MCx_CTL(bank); if (mce_flags.smca) {
} switch (reg) {
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
case MCA_MISC: return MSR_AMD64_SMCA_MCx_MISC(bank);
case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
}
}
static inline u32 status_reg(int bank) switch (reg) {
{ case MCA_CTL: return MSR_IA32_MCx_CTL(bank);
return MSR_IA32_MCx_STATUS(bank); case MCA_ADDR: return MSR_IA32_MCx_ADDR(bank);
} case MCA_MISC: return MSR_IA32_MCx_MISC(bank);
case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
}
static inline u32 addr_reg(int bank) return 0;
{
return MSR_IA32_MCx_ADDR(bank);
} }
static inline u32 misc_reg(int bank)
{
return MSR_IA32_MCx_MISC(bank);
}
static inline u32 smca_ctl_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_CTL(bank);
}
static inline u32 smca_status_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_STATUS(bank);
}
static inline u32 smca_addr_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_ADDR(bank);
}
static inline u32 smca_misc_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_MISC(bank);
}
struct mca_msr_regs msr_ops = {
.ctl = ctl_reg,
.status = status_reg,
.addr = addr_reg,
.misc = misc_reg
};
static void __print_mce(struct mce *m) static void __print_mce(struct mce *m)
{ {
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
@ -362,11 +334,11 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr) if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip); return offsetof(struct mce, ip);
if (msr == msr_ops.status(bank)) if (msr == mca_msr_reg(bank, MCA_STATUS))
return offsetof(struct mce, status); return offsetof(struct mce, status);
if (msr == msr_ops.addr(bank)) if (msr == mca_msr_reg(bank, MCA_ADDR))
return offsetof(struct mce, addr); return offsetof(struct mce, addr);
if (msr == msr_ops.misc(bank)) if (msr == mca_msr_reg(bank, MCA_MISC))
return offsetof(struct mce, misc); return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS) if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus); return offsetof(struct mce, mcgstatus);
@ -667,10 +639,10 @@ static struct notifier_block mce_default_nb = {
static void mce_read_aux(struct mce *m, int i) static void mce_read_aux(struct mce *m, int i)
{ {
if (m->status & MCI_STATUS_MISCV) if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i)); m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) { if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i)); m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
/* /*
* Mask the reported address by the reported granularity. * Mask the reported address by the reported granularity.
@ -740,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.bank = i; m.bank = i;
barrier(); barrier();
m.status = mce_rdmsrl(msr_ops.status(i)); m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
/* If this entry is not valid, ignore it */ /* If this entry is not valid, ignore it */
if (!(m.status & MCI_STATUS_VAL)) if (!(m.status & MCI_STATUS_VAL))
@ -808,7 +780,7 @@ clear_it:
/* /*
* Clear state for this bank. * Clear state for this bank.
*/ */
mce_wrmsrl(msr_ops.status(i), 0); mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
} }
/* /*
@ -822,6 +794,34 @@ clear_it:
} }
EXPORT_SYMBOL_GPL(machine_check_poll); EXPORT_SYMBOL_GPL(machine_check_poll);
/*
* During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
* EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
* Vol 3B Table 15-20). But this confuses both the code that determines
* whether the machine check occurred in kernel or user mode, and also
* the severity assessment code. Pretend that EIPV was set, and take the
* ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
*/
static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
{
if (bank != 0)
return;
if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
return;
if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
MCACOD)) !=
(MCI_STATUS_UC|MCI_STATUS_EN|
MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
MCI_STATUS_AR|MCACOD_INSTR))
return;
m->mcgstatus |= MCG_STATUS_EIPV;
m->ip = regs->ip;
m->cs = regs->cs;
}
/* /*
* Do a quick check if any of the events requires a panic. * Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them. * This decides if we keep the events around or clear them.
@ -833,13 +833,13 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
int i; int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) { for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
m->status = mce_rdmsrl(msr_ops.status(i)); m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL)) if (!(m->status & MCI_STATUS_VAL))
continue; continue;
__set_bit(i, validp); __set_bit(i, validp);
if (quirk_no_way_out) if (mce_flags.snb_ifu_quirk)
quirk_no_way_out(i, m, regs); quirk_sandybridge_ifu(i, m, regs);
m->bank = i; m->bank = i;
if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
@ -1126,7 +1126,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < this_cpu_read(mce_num_banks); i++) { for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (test_bit(i, toclear)) if (test_bit(i, toclear))
mce_wrmsrl(msr_ops.status(i), 0); mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
} }
} }
@ -1185,7 +1185,7 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
m->addr = 0; m->addr = 0;
m->bank = i; m->bank = i;
m->status = mce_rdmsrl(msr_ops.status(i)); m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL)) if (!(m->status & MCI_STATUS_VAL))
continue; continue;
@ -1254,7 +1254,7 @@ static void kill_me_maybe(struct callback_head *cb)
flags |= MF_MUST_KILL; flags |= MF_MUST_KILL;
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { if (!ret) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core(); sync_core();
return; return;
@ -1268,15 +1268,21 @@ static void kill_me_maybe(struct callback_head *cb)
if (ret == -EHWPOISON) if (ret == -EHWPOISON)
return; return;
if (p->mce_vaddr != (void __user *)-1l) { pr_err("Memory error not recovered");
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); kill_me_now(cb);
} else {
pr_err("Memory error not recovered");
kill_me_now(cb);
}
} }
static void queue_task_work(struct mce *m, char *msg, int kill_current_task) static void kill_me_never(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
p->mce_count = 0;
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
}
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
{ {
int count = ++current->mce_count; int count = ++current->mce_count;
@ -1286,11 +1292,7 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
current->mce_kflags = m->kflags; current->mce_kflags = m->kflags;
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(m); current->mce_whole_page = whole_page(m);
current->mce_kill_me.func = func;
if (kill_current_task)
current->mce_kill_me.func = kill_me_now;
else
current->mce_kill_me.func = kill_me_maybe;
} }
/* Ten is likely overkill. Don't expect more than two faults before task_work() */ /* Ten is likely overkill. Don't expect more than two faults before task_work() */
@ -1308,6 +1310,15 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
task_work_add(current, &current->mce_kill_me, TWA_RESUME); task_work_add(current, &current->mce_kill_me, TWA_RESUME);
} }
/* Handle unconfigured int18 (should never happen) */
static noinstr void unexpected_machine_check(struct pt_regs *regs)
{
instrumentation_begin();
pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
smp_processor_id());
instrumentation_end();
}
/* /*
* The actual machine check handler. This only handles real * The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18. * exceptions when something got corrupted coming in through int 18.
@ -1328,36 +1339,43 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
*/ */
noinstr void do_machine_check(struct pt_regs *regs) noinstr void do_machine_check(struct pt_regs *regs)
{ {
int worst = 0, order, no_way_out, kill_current_task, lmce;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg; struct mca_config *cfg = &mca_cfg;
struct mce m, *final; struct mce m, *final;
char *msg = NULL; char *msg = NULL;
int worst = 0;
if (unlikely(mce_flags.p5))
return pentium_machine_check(regs);
else if (unlikely(mce_flags.winchip))
return winchip_machine_check(regs);
else if (unlikely(!mca_cfg.initialized))
return unexpected_machine_check(regs);
/* /*
* Establish sequential order between the CPUs entering the machine * Establish sequential order between the CPUs entering the machine
* check handler. * check handler.
*/ */
int order = -1; order = -1;
/* /*
* If no_way_out gets set, there is no safe way to recover from this * If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/ */
int no_way_out = 0; no_way_out = 0;
/* /*
* If kill_current_task is not set, there might be a way to recover from this * If kill_current_task is not set, there might be a way to recover from this
* error. * error.
*/ */
int kill_current_task = 0; kill_current_task = 0;
/* /*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel. * on Intel.
*/ */
int lmce = 1; lmce = 1;
this_cpu_inc(mce_exception_count); this_cpu_inc(mce_exception_count);
@ -1441,7 +1459,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */ /* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs)); BUG_ON(!on_thread_stack() || !user_mode(regs));
queue_task_work(&m, msg, kill_current_task); if (kill_current_task)
queue_task_work(&m, msg, kill_me_now);
else
queue_task_work(&m, msg, kill_me_maybe);
} else { } else {
/* /*
@ -1459,7 +1480,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
} }
if (m.kflags & MCE_IN_KERNEL_COPYIN) if (m.kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&m, msg, kill_current_task); queue_task_work(&m, msg, kill_me_never);
} }
out: out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
@ -1669,8 +1690,8 @@ static void __mcheck_cpu_init_clear_banks(void)
if (!b->init) if (!b->init)
continue; continue;
wrmsrl(msr_ops.ctl(i), b->ctl); wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
wrmsrl(msr_ops.status(i), 0); wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
} }
} }
@ -1696,39 +1717,11 @@ static void __mcheck_cpu_check_banks(void)
if (!b->init) if (!b->init)
continue; continue;
rdmsrl(msr_ops.ctl(i), msrval); rdmsrl(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval; b->init = !!msrval;
} }
} }
/*
* During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
* EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
* Vol 3B Table 15-20). But this confuses both the code that determines
* whether the machine check occurred in kernel or user mode, and also
* the severity assessment code. Pretend that EIPV was set, and take the
* ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
*/
static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
{
if (bank != 0)
return;
if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
return;
if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
MCACOD)) !=
(MCI_STATUS_UC|MCI_STATUS_EN|
MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
MCI_STATUS_AR|MCACOD_INSTR))
return;
m->mcgstatus |= MCG_STATUS_EIPV;
m->ip = regs->ip;
m->cs = regs->cs;
}
/* Add per CPU specific workarounds here */ /* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{ {
@ -1802,7 +1795,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
cfg->bootlog = 0; cfg->bootlog = 0;
if (c->x86 == 6 && c->x86_model == 45) if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu; mce_flags.snb_ifu_quirk = 1;
} }
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
@ -1832,9 +1825,11 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
switch (c->x86_vendor) { switch (c->x86_vendor) {
case X86_VENDOR_INTEL: case X86_VENDOR_INTEL:
intel_p5_mcheck_init(c); intel_p5_mcheck_init(c);
mce_flags.p5 = 1;
return 1; return 1;
case X86_VENDOR_CENTAUR: case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c); winchip_mcheck_init(c);
mce_flags.winchip = 1;
return 1; return 1;
default: default:
return 0; return 0;
@ -1853,13 +1848,6 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.amd_threshold = 1; mce_flags.amd_threshold = 1;
if (mce_flags.smca) {
msr_ops.ctl = smca_ctl_reg;
msr_ops.status = smca_status_reg;
msr_ops.addr = smca_addr_reg;
msr_ops.misc = smca_misc_reg;
}
} }
} }
@ -1989,18 +1977,6 @@ bool filter_mce(struct mce *m)
return false; return false;
} }
/* Handle unconfigured int18 (should never happen) */
static noinstr void unexpected_machine_check(struct pt_regs *regs)
{
instrumentation_begin();
pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
smp_processor_id());
instrumentation_end();
}
/* Call the installed machine check handler for this CPU setup. */
void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{ {
irqentry_state_t irq_state; irqentry_state_t irq_state;
@ -2011,31 +1987,22 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
* Only required when from kernel mode. See * Only required when from kernel mode. See
* mce_check_crashing_cpu() for details. * mce_check_crashing_cpu() for details.
*/ */
if (machine_check_vector == do_machine_check && if (mca_cfg.initialized && mce_check_crashing_cpu())
mce_check_crashing_cpu())
return; return;
irq_state = irqentry_nmi_enter(regs); irq_state = irqentry_nmi_enter(regs);
/*
* The call targets are marked noinstr, but objtool can't figure
* that out because it's an indirect call. Annotate it.
*/
instrumentation_begin();
machine_check_vector(regs); do_machine_check(regs);
instrumentation_end();
irqentry_nmi_exit(regs, irq_state); irqentry_nmi_exit(regs, irq_state);
} }
static __always_inline void exc_machine_check_user(struct pt_regs *regs) static __always_inline void exc_machine_check_user(struct pt_regs *regs)
{ {
irqentry_enter_from_user_mode(regs); irqentry_enter_from_user_mode(regs);
instrumentation_begin();
machine_check_vector(regs); do_machine_check(regs);
instrumentation_end();
irqentry_exit_to_user_mode(regs); irqentry_exit_to_user_mode(regs);
} }
@ -2102,7 +2069,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return; return;
} }
machine_check_vector = do_machine_check; mca_cfg.initialized = 1;
__mcheck_cpu_init_early(c); __mcheck_cpu_init_early(c);
__mcheck_cpu_init_generic(); __mcheck_cpu_init_generic();
@ -2210,7 +2177,6 @@ int __init mcheck_init(void)
mce_register_decode_chain(&early_nb); mce_register_decode_chain(&early_nb);
mce_register_decode_chain(&mce_uc_nb); mce_register_decode_chain(&mce_uc_nb);
mce_register_decode_chain(&mce_default_nb); mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_gen_pool_process); INIT_WORK(&mce_work, mce_gen_pool_process);
init_irq_work(&mce_irq_work, mce_irq_work_cb); init_irq_work(&mce_irq_work, mce_irq_work_cb);
@ -2235,7 +2201,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(msr_ops.ctl(i), 0); wrmsrl(mca_msr_reg(i, MCA_CTL), 0);
} }
return; return;
} }
@ -2587,7 +2553,7 @@ static void mce_reenable_cpu(void)
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(msr_ops.ctl(i), b->ctl); wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
} }
} }

View File

@ -8,9 +8,6 @@
#include <linux/device.h> #include <linux/device.h>
#include <asm/mce.h> #include <asm/mce.h>
/* Pointer to the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *);
enum severity_level { enum severity_level {
MCE_NO_SEVERITY, MCE_NO_SEVERITY,
MCE_DEFERRED_SEVERITY, MCE_DEFERRED_SEVERITY,
@ -38,8 +35,7 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void); int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void); struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, struct pt_regs *regs, int mce_severity(struct mce *a, struct pt_regs *regs, int tolerant, char **msg, bool is_excp);
int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void); struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled; extern mce_banks_t mce_banks_ce_disabled;
@ -117,23 +113,25 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif #endif
struct mca_config { struct mca_config {
bool dont_log_ce;
bool cmci_disabled;
bool ignore_ce;
bool print_all;
__u64 lmce_disabled : 1, __u64 lmce_disabled : 1,
disabled : 1, disabled : 1,
ser : 1, ser : 1,
recovery : 1, recovery : 1,
bios_cmci_threshold : 1, bios_cmci_threshold : 1,
__reserved : 59; /* Proper #MC exception handler is set */
initialized : 1,
__reserved : 58;
bool dont_log_ce;
bool cmci_disabled;
bool ignore_ce;
bool print_all;
s8 bootlog;
int tolerant; int tolerant;
int monarch_timeout; int monarch_timeout;
int panic_timeout; int panic_timeout;
u32 rip_msr; u32 rip_msr;
s8 bootlog;
}; };
extern struct mca_config mca_cfg; extern struct mca_config mca_cfg;
@ -163,19 +161,28 @@ struct mce_vendor_flags {
/* AMD-style error thresholding banks present. */ /* AMD-style error thresholding banks present. */
amd_threshold : 1, amd_threshold : 1,
__reserved_0 : 60; /* Pentium, family 5-style MCA */
p5 : 1,
/* Centaur Winchip C6-style MCA */
winchip : 1,
/* SandyBridge IFU quirk */
snb_ifu_quirk : 1,
__reserved_0 : 57;
}; };
extern struct mce_vendor_flags mce_flags; extern struct mce_vendor_flags mce_flags;
struct mca_msr_regs { enum mca_msr {
u32 (*ctl) (int bank); MCA_CTL,
u32 (*status) (int bank); MCA_STATUS,
u32 (*addr) (int bank); MCA_ADDR,
u32 (*misc) (int bank); MCA_MISC,
}; };
extern struct mca_msr_regs msr_ops; u32 mca_msr_reg(int bank, enum mca_msr reg);
/* Decide whether to add MCE record to MCE event pool or filter it out. */ /* Decide whether to add MCE record to MCE event pool or filter it out. */
extern bool filter_mce(struct mce *m); extern bool filter_mce(struct mce *m);
@ -186,4 +193,18 @@ extern bool amd_filter_mce(struct mce *m);
static inline bool amd_filter_mce(struct mce *m) { return false; } static inline bool amd_filter_mce(struct mce *m) { return false; }
#endif #endif
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
noinstr void pentium_machine_check(struct pt_regs *regs);
noinstr void winchip_machine_check(struct pt_regs *regs);
static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
#else
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
static inline void pentium_machine_check(struct pt_regs *regs) {}
static inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
#endif /* __X86_MCE_INTERNAL_H__ */ #endif /* __X86_MCE_INTERNAL_H__ */

View File

@ -21,7 +21,7 @@
int mce_p5_enabled __read_mostly; int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */ /* Machine check handler for Pentium class Intel CPUs: */
static noinstr void pentium_machine_check(struct pt_regs *regs) noinstr void pentium_machine_check(struct pt_regs *regs)
{ {
u32 loaddr, hi, lotype; u32 loaddr, hi, lotype;
@ -54,10 +54,6 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_MCE)) if (!cpu_has(c, X86_FEATURE_MCE))
return; return;
machine_check_vector = pentium_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
/* Read registers before enabling: */ /* Read registers before enabling: */
rdmsr(MSR_IA32_P5_MC_ADDR, l, h); rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
rdmsr(MSR_IA32_P5_MC_TYPE, l, h); rdmsr(MSR_IA32_P5_MC_TYPE, l, h);

View File

@ -407,15 +407,14 @@ static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
} }
} }
/* Default to mce_severity_intel */ int mce_severity(struct mce *m, struct pt_regs *regs, int tolerant, char **msg,
int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) = bool is_excp)
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
{ {
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
mce_severity = mce_severity_amd; return mce_severity_amd(m, regs, tolerant, msg, is_excp);
else
return mce_severity_intel(m, regs, tolerant, msg, is_excp);
} }
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS

View File

@ -17,7 +17,7 @@
#include "internal.h" #include "internal.h"
/* Machine check handler for WinChip C6: */ /* Machine check handler for WinChip C6: */
static noinstr void winchip_machine_check(struct pt_regs *regs) noinstr void winchip_machine_check(struct pt_regs *regs)
{ {
instrumentation_begin(); instrumentation_begin();
pr_emerg("CPU0: Machine Check Exception.\n"); pr_emerg("CPU0: Machine Check Exception.\n");
@ -30,10 +30,6 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
{ {
u32 lo, hi; u32 lo, hi;
machine_check_vector = winchip_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
rdmsr(MSR_IDT_FCR1, lo, hi); rdmsr(MSR_IDT_FCR1, lo, hi);
lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
lo &= ~(1<<4); /* Enable MCE */ lo &= ~(1<<4); /* Enable MCE */

View File

@ -234,24 +234,11 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/ */
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx movl %edx,%ecx
cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
je 3f
1: rep movsb 1: rep movsb
2: mov %ecx,%eax 2: mov %ecx,%eax
ASM_CLAC ASM_CLAC
ret ret
/*
* Return zero to pretend that this copy succeeded. This
* is counter-intuitive, but needed to prevent the code
* in lib/iov_iter.c from retrying and running back into
* the poison cache line again. The machine check handler
* will ensure that a SIGBUS is sent to the task.
*/
3: xorl %eax,%eax
ASM_CLAC
ret
_ASM_EXTABLE_CPY(1b, 2b) _ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail) SYM_CODE_END(.Lcopy_user_handle_tail)