forked from Minki/linux
8fc5c73554
Persistent memory, as described by the ACPI NFIT (NVDIMM Firmware Interface Table), is the first known instance of a memory range described by a unique "target" proximity domain. Where "initiator" and "target" proximity domains is an approach that the ACPI HMAT (Heterogeneous Memory Attributes Table) uses to described the unique performance properties of a memory range relative to a given initiator (e.g. CPU or DMA device). Currently the numa-node for a /dev/pmemX block-device or /dev/daxX.Y char-device follows the traditional notion of 'numa-node' where the attribute conveys the closest online numa-node. That numa-node attribute is useful for cpu-binding and memory-binding processes *near* the device. However, when the memory range backing a 'pmem', or 'dax' device is onlined (memory hot-add) the memory-only-numa-node representing that address needs to be differentiated from the set of online nodes. In other words, the numa-node association of the device depends on whether you can bind processes *near* the cpu-numa-node in the offline device-case, or bind process *on* the memory-range directly after the backing address range is onlined. Allow for the case that platform firmware describes persistent memory with a unique proximity domain, i.e. when it is distinct from the proximity of DRAM and CPUs that are on the same socket. Plumb the Linux numa-node translation of that proximity through the libnvdimm region device to namespaces that are in device-dax mode. With this in place the proposed kmem driver [1] can optionally discover a unique numa-node number for the address range as it transitions the memory from an offline state managed by a device-driver to an online memory range managed by the core-mm. [1]: https://lore.kernel.org/lkml/20181022201317.8558C1D8@viggo.jf.intel.com Reported-by: Fan Du <fan.du@intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: "Oliver O'Halloran" <oohall@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Jérôme Glisse <jglisse@redhat.com> Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
96 lines
2.3 KiB
C
96 lines
2.3 KiB
C
/*
|
|
* Copyright (c) 2015, Christoph Hellwig.
|
|
* Copyright (c) 2015, Intel Corporation.
|
|
*/
|
|
#include <linux/platform_device.h>
|
|
#include <linux/memory_hotplug.h>
|
|
#include <linux/libnvdimm.h>
|
|
#include <linux/module.h>
|
|
|
|
static const struct attribute_group *e820_pmem_attribute_groups[] = {
|
|
&nvdimm_bus_attribute_group,
|
|
NULL,
|
|
};
|
|
|
|
static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
|
|
&nd_region_attribute_group,
|
|
&nd_device_attribute_group,
|
|
NULL,
|
|
};
|
|
|
|
static int e820_pmem_remove(struct platform_device *pdev)
|
|
{
|
|
struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
|
|
|
|
nvdimm_bus_unregister(nvdimm_bus);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
static int e820_range_to_nid(resource_size_t addr)
|
|
{
|
|
return memory_add_physaddr_to_nid(addr);
|
|
}
|
|
#else
|
|
static int e820_range_to_nid(resource_size_t addr)
|
|
{
|
|
return NUMA_NO_NODE;
|
|
}
|
|
#endif
|
|
|
|
static int e820_register_one(struct resource *res, void *data)
|
|
{
|
|
struct nd_region_desc ndr_desc;
|
|
struct nvdimm_bus *nvdimm_bus = data;
|
|
|
|
memset(&ndr_desc, 0, sizeof(ndr_desc));
|
|
ndr_desc.res = res;
|
|
ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
|
|
ndr_desc.numa_node = e820_range_to_nid(res->start);
|
|
ndr_desc.target_node = ndr_desc.numa_node;
|
|
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
|
|
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
|
|
return -ENXIO;
|
|
return 0;
|
|
}
|
|
|
|
static int e820_pmem_probe(struct platform_device *pdev)
|
|
{
|
|
static struct nvdimm_bus_descriptor nd_desc;
|
|
struct device *dev = &pdev->dev;
|
|
struct nvdimm_bus *nvdimm_bus;
|
|
int rc = -ENXIO;
|
|
|
|
nd_desc.attr_groups = e820_pmem_attribute_groups;
|
|
nd_desc.provider_name = "e820";
|
|
nd_desc.module = THIS_MODULE;
|
|
nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
|
|
if (!nvdimm_bus)
|
|
goto err;
|
|
platform_set_drvdata(pdev, nvdimm_bus);
|
|
|
|
rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
|
|
IORESOURCE_MEM, 0, -1, nvdimm_bus, e820_register_one);
|
|
if (rc)
|
|
goto err;
|
|
return 0;
|
|
err:
|
|
nvdimm_bus_unregister(nvdimm_bus);
|
|
dev_err(dev, "failed to register legacy persistent memory ranges\n");
|
|
return rc;
|
|
}
|
|
|
|
static struct platform_driver e820_pmem_driver = {
|
|
.probe = e820_pmem_probe,
|
|
.remove = e820_pmem_remove,
|
|
.driver = {
|
|
.name = "e820_pmem",
|
|
},
|
|
};
|
|
|
|
module_platform_driver(e820_pmem_driver);
|
|
|
|
MODULE_ALIAS("platform:e820_pmem*");
|
|
MODULE_LICENSE("GPL v2");
|
|
MODULE_AUTHOR("Intel Corporation");
|