EDAC, skx_edac: Detect non-volatile DIMMs

This just covers the topology function of the EDAC driver. We locate
which DIMM slots are populated with NVDIMMs and query the NFIT and
SMBIOS tables to get the size.

Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linux-acpi@vger.kernel.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: linux-nvdimm@lists.01.org
Link: http://lkml.kernel.org/r/20180312182430.10335-6-tony.luck@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
This commit is contained in:
Tony Luck 2018-03-12 11:24:30 -07:00 committed by Borislav Petkov
parent 6deae96b42
commit 58ca9ac146
2 changed files with 65 additions and 7 deletions

View File

@ -232,9 +232,12 @@ config EDAC_SBRIDGE
config EDAC_SKX config EDAC_SKX
tristate "Intel Skylake server Integrated MC" tristate "Intel Skylake server Integrated MC"
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
select DMI
help help
Support for error detection and correction the Intel Support for error detection and correction the Intel
Skylake server Integrated Memory Controllers. Skylake server Integrated Memory Controllers. If your
system has non-volatile DIMMs you should also manually
select CONFIG_ACPI_NFIT.
config EDAC_PND2 config EDAC_PND2
tristate "Intel Pondicherry2" tristate "Intel Pondicherry2"

View File

@ -14,6 +14,8 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/pci_ids.h> #include <linux/pci_ids.h>
#include <linux/slab.h> #include <linux/slab.h>
@ -24,6 +26,7 @@
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/mod_devicetable.h> #include <linux/mod_devicetable.h>
#include <acpi/nfit.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
#include <asm/processor.h> #include <asm/processor.h>
@ -302,6 +305,7 @@ static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
} }
#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15) #define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15)
#define IS_NVDIMM_PRESENT(mcddrtcfg, i) GET_BITFIELD((mcddrtcfg), (i), (i))
#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 0, 2, "ranks") #define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 0, 2, "ranks")
#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows") #define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
@ -350,8 +354,6 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
int banks = 16, ranks, rows, cols, npages; int banks = 16, ranks, rows, cols, npages;
u64 size; u64 size;
if (!IS_DIMM_PRESENT(mtr))
return 0;
ranks = numrank(mtr); ranks = numrank(mtr);
rows = numrow(mtr); rows = numrow(mtr);
cols = numcol(mtr); cols = numcol(mtr);
@ -383,6 +385,54 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
return 1; return 1;
} }
static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno)
{
int smbios_handle;
u32 dev_handle;
u16 flags;
u64 size = 0;
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0);
smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
if (smbios_handle == -EOPNOTSUPP) {
pr_warn_once(EDAC_MOD_STR ": Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n");
goto unknown_size;
}
if (smbios_handle < 0) {
skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=%x\n", dev_handle);
goto unknown_size;
}
if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
skx_printk(KERN_ERR, "NVDIMM ADR=%x is not mapped\n", dev_handle);
goto unknown_size;
}
size = dmi_memdev_size(smbios_handle);
if (size == ~0ull)
skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=%x/SMBIOS=%x\n",
dev_handle, smbios_handle);
unknown_size:
dimm->nr_pages = size >> PAGE_SHIFT;
dimm->grain = 32;
dimm->dtype = DEV_UNKNOWN;
dimm->mtype = MEM_NVDIMM;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu Mb (%u pages)\n",
imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return (size == 0 || size == ~0ull) ? 0 : 1;
}
#define SKX_GET_MTMTR(dev, reg) \ #define SKX_GET_MTMTR(dev, reg) \
pci_read_config_dword((dev), 0x87c, &reg) pci_read_config_dword((dev), 0x87c, &reg)
@ -399,20 +449,24 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
{ {
struct skx_pvt *pvt = mci->pvt_info; struct skx_pvt *pvt = mci->pvt_info;
struct skx_imc *imc = pvt->imc; struct skx_imc *imc = pvt->imc;
u32 mtr, amap, mcddrtcfg;
struct dimm_info *dimm; struct dimm_info *dimm;
int i, j; int i, j;
u32 mtr, amap;
int ndimms; int ndimms;
for (i = 0; i < NUM_CHANNELS; i++) { for (i = 0; i < NUM_CHANNELS; i++) {
ndimms = 0; ndimms = 0;
pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
for (j = 0; j < NUM_DIMMS; j++) { for (j = 0; j < NUM_DIMMS; j++) {
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, i, j, 0); mci->n_layers, i, j, 0);
pci_read_config_dword(imc->chan[i].cdev, pci_read_config_dword(imc->chan[i].cdev,
0x80 + 4*j, &mtr); 0x80 + 4*j, &mtr);
ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j); if (IS_DIMM_PRESENT(mtr))
ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
ndimms += get_nvdimm_info(dimm, imc, i, j);
} }
if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
@ -468,13 +522,14 @@ static int skx_register_mci(struct skx_imc *imc)
pvt = mci->pvt_info; pvt = mci->pvt_info;
pvt->imc = imc; pvt->imc = imc;
mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d", imc->node_id, imc->lmc); mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
imc->node_id, imc->lmc);
if (!mci->ctl_name) { if (!mci->ctl_name) {
rc = -ENOMEM; rc = -ENOMEM;
goto fail0; goto fail0;
} }
mci->mtype_cap = MEM_FLAG_DDR4; mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = EDAC_MOD_STR; mci->mod_name = EDAC_MOD_STR;