diff --git a/MAINTAINERS b/MAINTAINERS index 4f004dc0a0f2..f835946d78ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4643,10 +4643,11 @@ S: Maintained F: drivers/i2c/busses/i2c-diolan-u2c.c FILESYSTEM DIRECT ACCESS (DAX) -M: Matthew Wilcox -M: Ross Zwisler -M: Jan Kara +M: Dan Williams +R: Matthew Wilcox +R: Jan Kara L: linux-fsdevel@vger.kernel.org +L: linux-nvdimm@lists.01.org S: Supported F: fs/dax.c F: include/linux/dax.h @@ -4654,9 +4655,9 @@ F: include/trace/events/fs_dax.h DEVICE DIRECT ACCESS (DAX) M: Dan Williams -M: Dave Jiang -M: Ross Zwisler M: Vishal Verma +M: Keith Busch +M: Dave Jiang L: linux-nvdimm@lists.01.org S: Supported F: drivers/dax/ @@ -8812,7 +8813,6 @@ S: Maintained F: tools/lib/lockdep/ LIBNVDIMM BLK: MMIO-APERTURE DRIVER -M: Ross Zwisler M: Dan Williams M: Vishal Verma M: Dave Jiang @@ -8825,7 +8825,6 @@ F: drivers/nvdimm/region_devs.c LIBNVDIMM BTT: BLOCK TRANSLATION TABLE M: Vishal Verma M: Dan Williams -M: Ross Zwisler M: Dave Jiang L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ @@ -8833,7 +8832,6 @@ S: Supported F: drivers/nvdimm/btt* LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER -M: Ross Zwisler M: Dan Williams M: Vishal Verma M: Dave Jiang @@ -8852,9 +8850,10 @@ F: Documentation/devicetree/bindings/pmem/pmem-region.txt LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM M: Dan Williams -M: Ross Zwisler M: Vishal Verma M: Dave Jiang +M: Keith Busch +M: Ira Weiny L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index e18ade5d74e9..df8979008dd4 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -55,6 +55,10 @@ static bool no_init_ars; module_param(no_init_ars, bool, 0644); MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time"); +static bool force_labels; +module_param(force_labels, bool, 0444); +MODULE_PARM_DESC(force_labels, "Opt-in to labels despite missing methods"); + LIST_HEAD(acpi_descs); DEFINE_MUTEX(acpi_desc_lock); @@ -415,7 +419,7 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, if (call_pkg) { int i; - if (nfit_mem->family != call_pkg->nd_family) + if (nfit_mem && nfit_mem->family != call_pkg->nd_family) return -ENOTTY; for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) @@ -424,6 +428,10 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, return call_pkg->nd_command; } + /* In the !call_pkg case, bus commands == bus functions */ + if (!nfit_mem) + return cmd; + /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */ if (nfit_mem->family == NVDIMM_FAMILY_INTEL) return cmd; @@ -454,17 +462,18 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (cmd_rc) *cmd_rc = -EINVAL; + if (cmd == ND_CMD_CALL) + call_pkg = buf; + func = cmd_to_func(nfit_mem, cmd, call_pkg); + if (func < 0) + return func; + if (nvdimm) { struct acpi_device *adev = nfit_mem->adev; if (!adev) return -ENOTTY; - if (cmd == ND_CMD_CALL) - call_pkg = buf; - func = cmd_to_func(nfit_mem, cmd, call_pkg); - if (func < 0) - return func; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -475,12 +484,9 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); - func = cmd; cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; - dsm_mask = cmd_mask; - if (cmd == ND_CMD_CALL) - dsm_mask = nd_desc->bus_dsm_mask; + dsm_mask = nd_desc->bus_dsm_mask; desc = nd_cmd_bus_desc(cmd); guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -554,6 +560,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return -EINVAL; } + if (out_obj->type != ACPI_TYPE_BUFFER) { + dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n", + dimm_name, cmd_name, out_obj->type); + rc = -EINVAL; + goto out; + } + if (call_pkg) { call_pkg->nd_fw_size = out_obj->buffer.length; memcpy(call_pkg->nd_payload + call_pkg->nd_size_in, @@ -572,13 +585,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return 0; } - if (out_obj->package.type != ACPI_TYPE_BUFFER) { - dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n", - dimm_name, cmd_name, out_obj->type); - rc = -EINVAL; - goto out; - } - dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, cmd_name, out_obj->buffer.length); print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, @@ -1317,19 +1323,30 @@ static ssize_t scrub_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; ssize_t rc = -ENXIO; + bool busy; device_lock(dev); nd_desc = dev_get_drvdata(dev); - if (nd_desc) { - struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); - - mutex_lock(&acpi_desc->init_mutex); - rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, - acpi_desc->scrub_busy - && !acpi_desc->cancel ? "+\n" : "\n"); - mutex_unlock(&acpi_desc->init_mutex); + if (!nd_desc) { + device_unlock(dev); + return rc; } + acpi_desc = to_acpi_desc(nd_desc); + + mutex_lock(&acpi_desc->init_mutex); + busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags) + && !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags); + rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n"); + /* Allow an admin to poll the busy state at a higher rate */ + if (busy && capable(CAP_SYS_RAWIO) && !test_and_set_bit(ARS_POLL, + &acpi_desc->scrub_flags)) { + acpi_desc->scrub_tmo = 1; + mod_delayed_work(nfit_wq, &acpi_desc->dwork, HZ); + } + + mutex_unlock(&acpi_desc->init_mutex); device_unlock(dev); return rc; } @@ -1759,14 +1776,14 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) { + struct device *dev = &nfit_mem->adev->dev; struct nd_intel_smart smart = { 0 }; union acpi_object in_buf = { - .type = ACPI_TYPE_BUFFER, - .buffer.pointer = (char *) &smart, - .buffer.length = sizeof(smart), + .buffer.type = ACPI_TYPE_BUFFER, + .buffer.length = 0, }; union acpi_object in_obj = { - .type = ACPI_TYPE_PACKAGE, + .package.type = ACPI_TYPE_PACKAGE, .package.count = 1, .package.elements = &in_buf, }; @@ -1781,8 +1798,15 @@ __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) return; out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj); - if (!out_obj) + if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER + || out_obj->buffer.length < sizeof(smart)) { + dev_dbg(dev->parent, "%s: failed to retrieve initial health\n", + dev_name(dev)); + ACPI_FREE(out_obj); return; + } + memcpy(&smart, out_obj->buffer.pointer, sizeof(smart)); + ACPI_FREE(out_obj); if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) { if (smart.shutdown_state) @@ -1793,7 +1817,6 @@ __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags); nfit_mem->dirty_shutdown = smart.shutdown_count; } - ACPI_FREE(out_obj); } static void populate_shutdown_status(struct nfit_mem *nfit_mem) @@ -1861,9 +1884,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* - * Until standardization materializes we need to consider 4 - * different command sets. Note, that checking for function0 (bit0) - * tells us if any commands are reachable through this GUID. + * There are 4 "legacy" NVDIMM command sets + * (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before + * an EFI working group was established to constrain this + * proliferation. The nfit driver probes for the supported command + * set by GUID. Note, if you're a platform developer looking to add + * a new command set to this probe, consider using an existing set, + * or otherwise seek approval to publish the command set at + * http://www.uefi.org/RFIC_LIST. + * + * Note, that checking for function0 (bit0) tells us if any commands + * are reachable through this GUID. */ for (i = 0; i <= NVDIMM_FAMILY_MAX; i++) if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) @@ -1886,6 +1917,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dsm_mask &= ~(1 << 8); } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) { dsm_mask = 0xffffffff; + } else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) { + dsm_mask = 0x1f; } else { dev_dbg(dev, "unknown dimm command family\n"); nfit_mem->family = -1; @@ -1915,18 +1948,32 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | 1 << ND_CMD_SET_CONFIG_DATA; if (family == NVDIMM_FAMILY_INTEL && (dsm_mask & label_mask) == label_mask) - return 0; + /* skip _LS{I,R,W} enabling */; + else { + if (acpi_nvdimm_has_method(adev_dimm, "_LSI") + && acpi_nvdimm_has_method(adev_dimm, "_LSR")) { + dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); + set_bit(NFIT_MEM_LSR, &nfit_mem->flags); + } - if (acpi_nvdimm_has_method(adev_dimm, "_LSI") - && acpi_nvdimm_has_method(adev_dimm, "_LSR")) { - dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); - set_bit(NFIT_MEM_LSR, &nfit_mem->flags); - } + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags) + && acpi_nvdimm_has_method(adev_dimm, "_LSW")) { + dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); + set_bit(NFIT_MEM_LSW, &nfit_mem->flags); + } - if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags) - && acpi_nvdimm_has_method(adev_dimm, "_LSW")) { - dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); - set_bit(NFIT_MEM_LSW, &nfit_mem->flags); + /* + * Quirk read-only label configurations to preserve + * access to label-less namespaces by default. + */ + if (!test_bit(NFIT_MEM_LSW, &nfit_mem->flags) + && !force_labels) { + dev_dbg(dev, "%s: No _LSW, disable labels\n", + dev_name(&adev_dimm->dev)); + clear_bit(NFIT_MEM_LSR, &nfit_mem->flags); + } else + dev_dbg(dev, "%s: Force enable labels\n", + dev_name(&adev_dimm->dev)); } populate_shutdown_status(nfit_mem); @@ -2027,6 +2074,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; } + /* Quirk to ignore LOCAL for labels on HYPERV DIMMs */ + if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) + set_bit(NDD_NOBLK, &flags); + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) { set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); @@ -2050,7 +2101,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0) continue; - dev_info(acpi_desc->dev, "%s flags:%s%s%s%s%s\n", + dev_err(acpi_desc->dev, "Error found in NVDIMM %s flags:%s%s%s%s%s\n", nvdimm_name(nvdimm), mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "", mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"", @@ -2641,7 +2692,10 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, if (rc < 0) return rc; - return cmd_rc; + if (cmd_rc < 0) + return cmd_rc; + set_bit(ARS_VALID, &acpi_desc->scrub_flags); + return 0; } static int ars_continue(struct acpi_nfit_desc *acpi_desc) @@ -2651,11 +2705,11 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc) struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; - memset(&ars_start, 0, sizeof(ars_start)); - ars_start.address = ars_status->restart_address; - ars_start.length = ars_status->restart_length; - ars_start.type = ars_status->type; - ars_start.flags = acpi_desc->ars_start_flags; + ars_start = (struct nd_cmd_ars_start) { + .address = ars_status->restart_address, + .length = ars_status->restart_length, + .type = ars_status->type, + }; rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, sizeof(ars_start), &cmd_rc); if (rc < 0) @@ -2734,6 +2788,17 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) */ if (ars_status->out_length < 44) return 0; + + /* + * Ignore potentially stale results that are only refreshed + * after a start-ARS event. + */ + if (!test_and_clear_bit(ARS_VALID, &acpi_desc->scrub_flags)) { + dev_dbg(acpi_desc->dev, "skip %d stale records\n", + ars_status->num_records); + return 0; + } + for (i = 0; i < ars_status->num_records; i++) { /* only process full records */ if (ars_status->out_length @@ -3004,14 +3069,16 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc, { int rc; - if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state)) + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) return acpi_nfit_register_region(acpi_desc, nfit_spa); set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state); - set_bit(ARS_REQ_LONG, &nfit_spa->ars_state); + if (!no_init_ars) + set_bit(ARS_REQ_LONG, &nfit_spa->ars_state); switch (acpi_nfit_query_poison(acpi_desc)) { case 0: + case -ENOSPC: case -EAGAIN: rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT); /* shouldn't happen, try again later */ @@ -3036,7 +3103,6 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc, break; case -EBUSY: case -ENOMEM: - case -ENOSPC: /* * BIOS was using ARS, wait for it to complete (or * resources to become available) and then perform our @@ -3071,7 +3137,7 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, lockdep_assert_held(&acpi_desc->init_mutex); - if (acpi_desc->cancel) + if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) return 0; if (query_rc == -EBUSY) { @@ -3145,7 +3211,7 @@ static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo) { lockdep_assert_held(&acpi_desc->init_mutex); - acpi_desc->scrub_busy = 1; + set_bit(ARS_BUSY, &acpi_desc->scrub_flags); /* note this should only be set from within the workqueue */ if (tmo) acpi_desc->scrub_tmo = tmo; @@ -3161,7 +3227,7 @@ static void notify_ars_done(struct acpi_nfit_desc *acpi_desc) { lockdep_assert_held(&acpi_desc->init_mutex); - acpi_desc->scrub_busy = 0; + clear_bit(ARS_BUSY, &acpi_desc->scrub_flags); acpi_desc->scrub_count++; if (acpi_desc->scrub_count_state) sysfs_notify_dirent(acpi_desc->scrub_count_state); @@ -3182,6 +3248,7 @@ static void acpi_nfit_scrub(struct work_struct *work) else notify_ars_done(acpi_desc); memset(acpi_desc->ars_status, 0, acpi_desc->max_ars); + clear_bit(ARS_POLL, &acpi_desc->scrub_flags); mutex_unlock(&acpi_desc->init_mutex); } @@ -3216,6 +3283,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) struct nfit_spa *nfit_spa; int rc; + set_bit(ARS_VALID, &acpi_desc->scrub_flags); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { switch (nfit_spa_type(nfit_spa->spa)) { case NFIT_SPA_VOLATILE: @@ -3450,7 +3518,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa; mutex_lock(&acpi_desc->init_mutex); - if (acpi_desc->cancel) { + if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) { mutex_unlock(&acpi_desc->init_mutex); return 0; } @@ -3529,7 +3597,7 @@ void acpi_nfit_shutdown(void *data) mutex_unlock(&acpi_desc_lock); mutex_lock(&acpi_desc->init_mutex); - acpi_desc->cancel = 1; + set_bit(ARS_CANCEL, &acpi_desc->scrub_flags); cancel_delayed_work_sync(&acpi_desc->dwork); mutex_unlock(&acpi_desc->init_mutex); @@ -3729,6 +3797,7 @@ static __init int nfit_init(void) guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]); + guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]); nfit_wq = create_singlethread_workqueue("nfit"); if (!nfit_wq) diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 33691aecfcee..2f8cf2a11e3b 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -34,11 +34,14 @@ /* https://msdn.microsoft.com/library/windows/hardware/mt604741 */ #define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05" +/* http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901") */ +#define UUID_NFIT_DIMM_N_HYPERV "5746c5f2-a9a2-4264-ad0e-e4ddc9e09e80" + #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) -#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT +#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV #define NVDIMM_STANDARD_CMDMASK \ (1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \ @@ -94,6 +97,7 @@ enum nfit_uuids { NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1, NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2, NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT, + NFIT_DEV_DIMM_N_HYPERV = NVDIMM_FAMILY_HYPERV, NFIT_SPA_VOLATILE, NFIT_SPA_PM, NFIT_SPA_DCR, @@ -210,6 +214,13 @@ struct nfit_mem { int family; }; +enum scrub_flags { + ARS_BUSY, + ARS_CANCEL, + ARS_VALID, + ARS_POLL, +}; + struct acpi_nfit_desc { struct nvdimm_bus_descriptor nd_desc; struct acpi_table_header acpi_header; @@ -223,7 +234,6 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; - u8 ars_start_flags; struct nd_cmd_ars_status *ars_status; struct nfit_spa *scrub_spa; struct delayed_work dwork; @@ -232,8 +242,7 @@ struct acpi_nfit_desc { unsigned int max_ars; unsigned int scrub_count; unsigned int scrub_mode; - unsigned int scrub_busy:1; - unsigned int cancel:1; + unsigned long scrub_flags; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_nfit_cmd_force_en; diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 6e928f37d084..0cb8c30ea278 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -86,12 +86,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) { struct dax_device *dax_dev; bool dax_enabled = false; + pgoff_t pgoff, pgoff_end; struct request_queue *q; - pgoff_t pgoff; - int err, id; - pfn_t pfn; - long len; char buf[BDEVNAME_SIZE]; + void *kaddr, *end_kaddr; + pfn_t pfn, end_pfn; + sector_t last_page; + long len, len2; + int err, id; if (blocksize != PAGE_SIZE) { pr_debug("%s: error: unsupported blocksize for dax\n", @@ -113,6 +115,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } + last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8; + err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); + if (err) { + pr_debug("%s: error: unaligned partition for dax\n", + bdevname(bdev, buf)); + return false; + } + dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) { pr_debug("%s: error: device does not support dax\n", @@ -121,14 +131,15 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) } id = dax_read_lock(); - len = dax_direct_access(dax_dev, pgoff, 1, NULL, &pfn); + len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); + len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); dax_read_unlock(id); put_dax(dax_dev); - if (len < 1) { + if (len < 1 || len2 < 1) { pr_debug("%s: error: dax access failed (%ld)\n", - bdevname(bdev, buf), len); + bdevname(bdev, buf), len < 1 ? len : len2); return false; } @@ -143,13 +154,20 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) */ WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); dax_enabled = true; - } else if (pfn_t_devmap(pfn)) { - struct dev_pagemap *pgmap; + } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) { + struct dev_pagemap *pgmap, *end_pgmap; pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); - if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX) + end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL); + if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX + && pfn_t_to_page(pfn)->pgmap == pgmap + && pfn_t_to_page(end_pfn)->pgmap == pgmap + && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr)) + && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr))) dax_enabled = true; put_dev_pagemap(pgmap); + put_dev_pagemap(end_pgmap); + } if (!dax_enabled) { diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index b123b0dcf274..4671776f5623 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -541,9 +541,9 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane) static int btt_freelist_init(struct arena_info *arena) { - int old, new, ret; - u32 i, map_entry; - struct log_entry log_new, log_old; + int new, ret; + struct log_entry log_new; + u32 i, map_entry, log_oldmap, log_newmap; arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), GFP_KERNEL); @@ -551,24 +551,26 @@ static int btt_freelist_init(struct arena_info *arena) return -ENOMEM; for (i = 0; i < arena->nfree; i++) { - old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); - if (old < 0) - return old; - new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); if (new < 0) return new; + /* old and new map entries with any flags stripped out */ + log_oldmap = ent_lba(le32_to_cpu(log_new.old_map)); + log_newmap = ent_lba(le32_to_cpu(log_new.new_map)); + /* sub points to the next one to be overwritten */ arena->freelist[i].sub = 1 - new; arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); - arena->freelist[i].block = le32_to_cpu(log_new.old_map); + arena->freelist[i].block = log_oldmap; /* * FIXME: if error clearing fails during init, we want to make * the BTT read-only */ - if (ent_e_flag(log_new.old_map)) { + if (ent_e_flag(log_new.old_map) && + !ent_normal(log_new.old_map)) { + arena->freelist[i].has_err = 1; ret = arena_clear_freelist_error(arena, i); if (ret) dev_err_ratelimited(to_dev(arena), @@ -576,7 +578,7 @@ static int btt_freelist_init(struct arena_info *arena) } /* This implies a newly created or untouched flog entry */ - if (log_new.old_map == log_new.new_map) + if (log_oldmap == log_newmap) continue; /* Check if map recovery is needed */ @@ -584,8 +586,15 @@ static int btt_freelist_init(struct arena_info *arena) NULL, NULL, 0); if (ret) return ret; - if ((le32_to_cpu(log_new.new_map) != map_entry) && - (le32_to_cpu(log_new.old_map) == map_entry)) { + + /* + * The map_entry from btt_read_map is stripped of any flag bits, + * so use the stripped out versions from the log as well for + * testing whether recovery is needed. For restoration, use the + * 'raw' version of the log entries as that captured what we + * were going to write originally. + */ + if ((log_newmap != map_entry) && (log_oldmap == map_entry)) { /* * Last transaction wrote the flog, but wasn't able * to complete the map write. So fix up the map. diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index db3cb6d4d0d4..ddff49c707b0 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -44,6 +44,8 @@ #define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) #define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) #define set_e_flag(ent) (ent |= MAP_ERR_MASK) +/* 'normal' is both e and z flags set */ +#define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent)) enum btt_init_state { INIT_UNCHECKED = 0, diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 795ad4ff35ca..b72a303176c7 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -159,11 +159,19 @@ static ssize_t size_show(struct device *dev, } static DEVICE_ATTR_RO(size); +static ssize_t log_zero_flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Y\n"); +} +static DEVICE_ATTR_RO(log_zero_flags); + static struct attribute *nd_btt_attributes[] = { &dev_attr_sector_size.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, &dev_attr_size.attr, + &dev_attr_log_zero_flags.attr, NULL, }; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index efe412a6b5b9..91b9abbf689c 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -11,6 +11,7 @@ * General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -25,6 +26,10 @@ static DEFINE_IDA(dimm_ida); +static bool noblk; +module_param(noblk, bool, 0444); +MODULE_PARM_DESC(noblk, "force disable BLK / local alias support"); + /* * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands @@ -551,6 +556,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, nvdimm->dimm_id = dimm_id; nvdimm->provider_data = provider_data; + if (noblk) + flags |= 1 << NDD_NOBLK; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; nvdimm->num_flush = num_flush; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index a11bf4e6b451..f3d753d3169c 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -392,6 +392,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; /* no label, nothing to reserve */ for_each_clear_bit_le(slot, free, nslot) { + struct nvdimm *nvdimm = to_nvdimm(ndd->dev); struct nd_namespace_label *nd_label; struct nd_region *nd_region = NULL; u8 label_uuid[NSLABEL_UUID_LEN]; @@ -406,6 +407,8 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); flags = __le32_to_cpu(nd_label->flags); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) + flags &= ~NSLABEL_FLAG_LOCAL; nd_label_gen_id(&label_id, label_uuid, flags); res = nvdimm_allocate_dpa(ndd, &label_id, __le64_to_cpu(nd_label->dpa), @@ -755,7 +758,7 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, - int pos) + int pos, unsigned long flags) { struct nd_namespace_common *ndns = &nspm->nsio.common; struct nd_interleave_set *nd_set = nd_region->nd_set; @@ -796,7 +799,7 @@ static int __pmem_label_update(struct nd_region *nd_region, memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); if (nspm->alt_name) memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); - nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING); + nd_label->flags = __cpu_to_le32(flags); nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings); nd_label->position = __cpu_to_le16(pos); nd_label->isetcookie = __cpu_to_le64(cookie); @@ -1249,13 +1252,13 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) int nd_pmem_namespace_label_update(struct nd_region *nd_region, struct nd_namespace_pmem *nspm, resource_size_t size) { - int i; + int i, rc; for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct resource *res; - int rc, count = 0; + int count = 0; if (size == 0) { rc = del_labels(nd_mapping, nspm->uuid); @@ -1273,7 +1276,20 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region, if (rc < 0) return rc; - rc = __pmem_label_update(nd_region, nd_mapping, nspm, i); + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, + NSLABEL_FLAG_UPDATING); + if (rc) + return rc; + } + + if (size == 0) + return 0; + + /* Clear the UPDATING flag per UEFI 2.7 expectations */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, 0); if (rc) return rc; } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4b077555ac70..7849bf1812c4 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -138,6 +138,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns = to_ndns(dev); struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) @@ -149,6 +150,9 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + if (ndns->force_raw) + return false; + nsio = to_nd_namespace_io(dev); if (region_intersects(nsio->res.start, resource_size(&nsio->res), IORESOURCE_SYSTEM_RAM, @@ -1506,13 +1510,13 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf) if (dev->driver || ndns->claim) return -EBUSY; - if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0) + if (sysfs_streq(buf, "btt")) ndns->claim_class = btt_claim_class(dev); - else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0) + else if (sysfs_streq(buf, "pfn")) ndns->claim_class = NVDIMM_CCLASS_PFN; - else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0) + else if (sysfs_streq(buf, "dax")) ndns->claim_class = NVDIMM_CCLASS_DAX; - else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0) + else if (sysfs_streq(buf, "")) ndns->claim_class = NVDIMM_CCLASS_NONE; else return -EINVAL; @@ -2492,6 +2496,12 @@ static int init_active_labels(struct nd_region *nd_region) if (!label_ent) break; label = nd_label_active(ndd, j); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) { + u32 flags = __le32_to_cpu(label->flags); + + flags &= ~NSLABEL_FLAG_LOCAL; + label->flags = __cpu_to_le32(flags); + } label_ent->label = label; mutex_lock(&nd_mapping->lock); diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 0a701837dfc0..11b9821eba85 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -108,7 +108,6 @@ static struct platform_driver of_pmem_region_driver = { .remove = of_pmem_region_remove, .driver = { .name = "of_pmem", - .owner = THIS_MODULE, .of_match_table = of_pmem_region_match, }, }; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 6f22272e8d80..d271bd731af7 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -580,6 +580,11 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) } EXPORT_SYMBOL(nd_pfn_probe); +static u32 info_block_reserve(void) +{ + return ALIGN(SZ_8K, PAGE_SIZE); +} + /* * We hotplug memory at section granularity, pad the reserved area from * the previous section base to the namespace base address. @@ -593,7 +598,7 @@ static unsigned long init_altmap_base(resource_size_t base) static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); @@ -608,6 +613,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + u32 reserve = info_block_reserve(); struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; @@ -621,7 +627,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) res->end -= end_trunc; if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (offset < reserve) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); pgmap->altmap_valid = false; @@ -634,7 +640,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); memcpy(altmap, &__altmap, sizeof(*altmap)); - altmap->free = PHYS_PFN(offset - SZ_8K); + altmap->free = PHYS_PFN(offset - reserve); altmap->alloc = 0; pgmap->altmap_valid = true; } else @@ -678,18 +684,17 @@ static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trun if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) == REGION_MIXED || !IS_ALIGNED(end, nd_pfn->align) - || nd_region_conflict(nd_region, start, size + adjust)) + || nd_region_conflict(nd_region, start, size)) *end_trunc = end - phys_pmem_align_down(nd_pfn, end); } static int nd_pfn_init(struct nd_pfn *nd_pfn) { - u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + u32 start_pad, end_trunc, reserve = info_block_reserve(); resource_size_t start, size; struct nd_region *nd_region; - u32 start_pad, end_trunc; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; @@ -734,7 +739,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) */ start = nsio->res.start + start_pad; size = resource_size(&nsio->res); - npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) + npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve) / PAGE_SIZE); if (nd_pfn->mode == PFN_MODE_PMEM) { /* @@ -742,11 +747,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ - offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, + offset = ALIGN(start + reserve + 64 * npfns, max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(start + SZ_8K + dax_label_reserve, - nd_pfn->align) - start; + offset = ALIGN(start + reserve, nd_pfn->align) - start; else return -ENXIO; diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e2818f94f292..3b58baa44b5c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1003,6 +1003,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (test_bit(NDD_UNARMED, &nvdimm->flags)) ro = 1; + + if (test_bit(NDD_NOBLK, &nvdimm->flags) + && dev_type == &nd_blk_device_type) { + dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n", + caller, dev_name(&nvdimm->dev), i); + return NULL; + } } if (dev_type == &nd_blk_device_type) { diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index ad609617aeb8..43348303cb4b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -42,6 +42,8 @@ enum { NDD_SECURITY_OVERWRITE = 3, /* tracking whether or not there is a pending device reference */ NDD_WORK_PENDING = 4, + /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */ + NDD_NOBLK = 5, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index f57c9e434d2d..de5d90212409 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -243,6 +243,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE1 1 #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 +#define NVDIMM_FAMILY_HYPERV 4 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg)