8fc5c73554
Persistent memory, as described by the ACPI NFIT (NVDIMM Firmware Interface Table), is the first known instance of a memory range described by a unique "target" proximity domain. Where "initiator" and "target" proximity domains is an approach that the ACPI HMAT (Heterogeneous Memory Attributes Table) uses to described the unique performance properties of a memory range relative to a given initiator (e.g. CPU or DMA device). Currently the numa-node for a /dev/pmemX block-device or /dev/daxX.Y char-device follows the traditional notion of 'numa-node' where the attribute conveys the closest online numa-node. That numa-node attribute is useful for cpu-binding and memory-binding processes *near* the device. However, when the memory range backing a 'pmem', or 'dax' device is onlined (memory hot-add) the memory-only-numa-node representing that address needs to be differentiated from the set of online nodes. In other words, the numa-node association of the device depends on whether you can bind processes *near* the cpu-numa-node in the offline device-case, or bind process *on* the memory-range directly after the backing address range is onlined. Allow for the case that platform firmware describes persistent memory with a unique proximity domain, i.e. when it is distinct from the proximity of DRAM and CPUs that are on the same socket. Plumb the Linux numa-node translation of that proximity through the libnvdimm region device to namespaces that are in device-dax mode. With this in place the proposed kmem driver [1] can optionally discover a unique numa-node number for the address range as it transitions the memory from an offline state managed by a device-driver to an online memory range managed by the core-mm. [1]: https://lore.kernel.org/lkml/20181022201317.8558C1D8@viggo.jf.intel.com Reported-by: Fan Du <fan.du@intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: "Oliver O'Halloran" <oohall@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Jérôme Glisse <jglisse@redhat.com> Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
72 lines
2.2 KiB
C
72 lines
2.2 KiB
C
/*
|
|
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*/
|
|
#ifndef __DAX_PRIVATE_H__
|
|
#define __DAX_PRIVATE_H__
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/cdev.h>
|
|
|
|
/* private routines between core files */
|
|
struct dax_device;
|
|
struct dax_device *inode_dax(struct inode *inode);
|
|
struct inode *dax_inode(struct dax_device *dax_dev);
|
|
int dax_bus_init(void);
|
|
void dax_bus_exit(void);
|
|
|
|
/**
|
|
* struct dax_region - mapping infrastructure for dax devices
|
|
* @id: kernel-wide unique region for a memory range
|
|
* @target_node: effective numa node if this memory range is onlined
|
|
* @kref: to pin while other agents have a need to do lookups
|
|
* @dev: parent device backing this region
|
|
* @align: allocation and mapping alignment for child dax devices
|
|
* @res: physical address range of the region
|
|
* @pfn_flags: identify whether the pfns are paged back or not
|
|
*/
|
|
struct dax_region {
|
|
int id;
|
|
int target_node;
|
|
struct kref kref;
|
|
struct device *dev;
|
|
unsigned int align;
|
|
struct resource res;
|
|
unsigned long pfn_flags;
|
|
};
|
|
|
|
/**
|
|
* struct dev_dax - instance data for a subdivision of a dax region, and
|
|
* data while the device is activated in the driver.
|
|
* @region - parent region
|
|
* @dax_dev - core dax functionality
|
|
* @target_node: effective numa node if dev_dax memory range is onlined
|
|
* @dev - device core
|
|
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
|
|
* @ref: pgmap reference count (driver owned)
|
|
* @cmp: @ref final put completion (driver owned)
|
|
*/
|
|
struct dev_dax {
|
|
struct dax_region *region;
|
|
struct dax_device *dax_dev;
|
|
int target_node;
|
|
struct device dev;
|
|
struct dev_pagemap pgmap;
|
|
struct percpu_ref ref;
|
|
struct completion cmp;
|
|
};
|
|
|
|
static inline struct dev_dax *to_dev_dax(struct device *dev)
|
|
{
|
|
return container_of(dev, struct dev_dax, dev);
|
|
}
|
|
#endif
|