2019-05-29 14:18:09 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-07-24 04:51:21 +00:00
|
|
|
/*
|
|
|
|
* NFIT - Machine Check Handler
|
|
|
|
*
|
|
|
|
* Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
|
|
|
|
*/
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/acpi.h>
|
2016-09-30 23:19:29 +00:00
|
|
|
#include <linux/nd.h>
|
2016-07-24 04:51:21 +00:00
|
|
|
#include <asm/mce.h>
|
|
|
|
#include "nfit.h"
|
|
|
|
|
|
|
|
static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
struct mce *mce = (struct mce *)data;
|
|
|
|
struct acpi_nfit_desc *acpi_desc;
|
|
|
|
struct nfit_spa *nfit_spa;
|
|
|
|
|
2018-10-26 00:37:28 +00:00
|
|
|
/* We only care about uncorrectable memory errors */
|
|
|
|
if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
|
2016-07-24 04:51:21 +00:00
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
2018-10-26 00:37:29 +00:00
|
|
|
/* Verify the address reported in the MCE is valid. */
|
|
|
|
if (!mce_usable_address(mce))
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
2016-07-24 04:51:21 +00:00
|
|
|
/*
|
|
|
|
* mce->addr contains the physical addr accessed that caused the
|
|
|
|
* machine check. We need to walk through the list of NFITs, and see
|
|
|
|
* if any of them matches that address, and only then start a scrub.
|
|
|
|
*/
|
|
|
|
mutex_lock(&acpi_desc_lock);
|
|
|
|
list_for_each_entry(acpi_desc, &acpi_descs, list) {
|
|
|
|
struct device *dev = acpi_desc->dev;
|
|
|
|
int found_match = 0;
|
|
|
|
|
|
|
|
mutex_lock(&acpi_desc->init_mutex);
|
|
|
|
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
|
|
|
|
struct acpi_nfit_system_address *spa = nfit_spa->spa;
|
|
|
|
|
2016-09-02 23:27:30 +00:00
|
|
|
if (nfit_spa_type(spa) != NFIT_SPA_PM)
|
2016-07-24 04:51:21 +00:00
|
|
|
continue;
|
|
|
|
/* find the spa that covers the mce addr */
|
|
|
|
if (spa->address > mce->addr)
|
|
|
|
continue;
|
|
|
|
if ((spa->address + spa->length - 1) < mce->addr)
|
|
|
|
continue;
|
|
|
|
found_match = 1;
|
2018-03-02 12:20:49 +00:00
|
|
|
dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
|
|
|
|
spa->range_index, spa->address, spa->length);
|
2016-07-24 04:51:21 +00:00
|
|
|
/*
|
|
|
|
* We can break at the first match because we're going
|
|
|
|
* to rescan all the SPA ranges. There shouldn't be any
|
|
|
|
* aliasing anyway.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&acpi_desc->init_mutex);
|
|
|
|
|
2016-09-30 23:19:29 +00:00
|
|
|
if (!found_match)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* If this fails due to an -ENOMEM, there is little we can do */
|
2017-08-23 19:48:26 +00:00
|
|
|
nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
|
2016-09-30 23:19:29 +00:00
|
|
|
ALIGN(mce->addr, L1_CACHE_BYTES),
|
|
|
|
L1_CACHE_BYTES);
|
|
|
|
nvdimm_region_notify(nfit_spa->nd_region,
|
|
|
|
NVDIMM_REVALIDATE_POISON);
|
|
|
|
|
|
|
|
if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
|
|
|
|
/*
|
|
|
|
* We can ignore an -EBUSY here because if an ARS is
|
|
|
|
* already in progress, just let that be the last
|
|
|
|
* authoritative one
|
|
|
|
*/
|
2017-06-30 02:41:30 +00:00
|
|
|
acpi_nfit_ars_rescan(acpi_desc, 0);
|
2016-09-30 23:19:29 +00:00
|
|
|
}
|
|
|
|
break;
|
2016-07-24 04:51:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&acpi_desc_lock);
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block nfit_mce_dec = {
|
|
|
|
.notifier_call = nfit_handle_mce,
|
2017-01-23 18:35:14 +00:00
|
|
|
.priority = MCE_PRIO_NFIT,
|
2016-07-24 04:51:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
void nfit_mce_register(void)
|
|
|
|
{
|
|
|
|
mce_register_decode_chain(&nfit_mce_dec);
|
|
|
|
}
|
|
|
|
|
|
|
|
void nfit_mce_unregister(void)
|
|
|
|
{
|
|
|
|
mce_unregister_decode_chain(&nfit_mce_dec);
|
|
|
|
}
|