dax + libnvdimm for v5.17

- Simplify the dax_operations API
   - Eliminate bdev_dax_pgoff() in favor of the filesystem maintaining
     and applying a partition offset to all its DAX iomap operations.
   - Remove wrappers and device-mapper stacked callbacks for
     ->copy_from_iter() and ->copy_to_iter() in favor of moving
     block_device relative offset responsibility to the
     dax_direct_access() caller.
   - Remove the need for an @bdev in filesystem-DAX infrastructure
   - Remove unused uio helpers copy_from_iter_flushcache() and
     copy_mc_to_iter() as only the non-check_copy_size() versions are
     used for DAX.
 - Prepare XFS for the pending (next merge window) DAX+reflink support
 - Remove deprecated DEV_DAX_PMEM_COMPAT support
 - Cleanup a straggling misuse of the GUID api
 
 Tags offered after the branch was cut:
 Reviewed-by: Mike Snitzer <snitzer@redhat.com>
 Link: https://lore.kernel.org/r/Ydb/3P+8nvjCjYfO@redhat.com
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCYd3dTAAKCRDfioYZHlFs
 Z//UAP9zetoTE+O7zJG7CXja4jSopSadbdbh6QKSXaqfKBPvQQD+N4US3wA2bGv8
 f/qCY62j2Hj3hUTGHs9RvTyw3JsSYAA=
 =QvDs
 -----END PGP SIGNATURE-----

Merge tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull dax and libnvdimm updates from Dan Williams:
 "The bulk of this is a rework of the dax_operations API after
  discovering the obstacles it posed to the work-in-progress DAX+reflink
  support for XFS and other copy-on-write filesystem mechanics.

  Primarily the need to plumb a block_device through the API to handle
  partition offsets was a sticking point and Christoph untangled that
  dependency in addition to other cleanups to make landing the
  DAX+reflink support easier.

  The DAX_PMEM_COMPAT option has been around for 4 years and not only
  are distributions shipping userspace that understand the current
  configuration API, but some are not even bothering to turn this option
  on anymore, so it seems a good time to remove it per the deprecation
  schedule. Recall that this was added after the device-dax subsystem
  moved from /sys/class/dax to /sys/bus/dax for its sysfs organization.
  All recent functionality depends on /sys/bus/dax.

  Some other miscellaneous cleanups and reflink prep patches are
  included as well.

  Summary:

   - Simplify the dax_operations API:

      - Eliminate bdev_dax_pgoff() in favor of the filesystem
        maintaining and applying a partition offset to all its DAX iomap
        operations.

      - Remove wrappers and device-mapper stacked callbacks for
        ->copy_from_iter() and ->copy_to_iter() in favor of moving
        block_device relative offset responsibility to the
        dax_direct_access() caller.

      - Remove the need for an @bdev in filesystem-DAX infrastructure

      - Remove unused uio helpers copy_from_iter_flushcache() and
        copy_mc_to_iter() as only the non-check_copy_size() versions are
        used for DAX.

   - Prepare XFS for the pending (next merge window) DAX+reflink support

   - Remove deprecated DEV_DAX_PMEM_COMPAT support

   - Cleanup a straggling misuse of the GUID api"

* tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (38 commits)
  iomap: Fix error handling in iomap_zero_iter()
  ACPI: NFIT: Import GUID before use
  dax: remove the copy_from_iter and copy_to_iter methods
  dax: remove the DAXDEV_F_SYNC flag
  dax: simplify dax_synchronous and set_dax_synchronous
  uio: remove copy_from_iter_flushcache() and copy_mc_to_iter()
  iomap: turn the byte variable in iomap_zero_iter into a ssize_t
  memremap: remove support for external pgmap refcounts
  fsdax: don't require CONFIG_BLOCK
  iomap: build the block based code conditionally
  dax: fix up some of the block device related ifdefs
  fsdax: shift partition offset handling into the file systems
  dax: return the partition offset from fs_dax_get_by_bdev
  iomap: add a IOMAP_DAX flag
  xfs: pass the mapping flags to xfs_bmbt_to_iomap
  xfs: use xfs_direct_write_iomap_ops for DAX zeroing
  xfs: move dax device handling into xfs_{alloc,free}_buftarg
  ext4: cleanup the dax handling in ext4_fill_super
  ext2: cleanup the dax handling in ext2_fill_super
  fsdax: decouple zeroing from the iomap buffered I/O code
  ...
This commit is contained in:
Linus Torvalds 2022-01-12 15:46:11 -08:00
commit 3acbdbf42e
63 changed files with 567 additions and 1186 deletions

View File

@ -1,22 +0,0 @@
What: /sys/class/dax/
Date: May, 2016
KernelVersion: v4.7
Contact: nvdimm@lists.linux.dev
Description: Device DAX is the device-centric analogue of Filesystem
DAX (CONFIG_FS_DAX). It allows memory ranges to be
allocated and mapped without need of an intervening file
system. Device DAX is strict, precise and predictable.
Specifically this interface:
1. Guarantees fault granularity with respect to a given
page size (pte, pmd, or pud) set at configuration time.
2. Enforces deterministic behavior by being strict about
what fault scenarios are supported.
The /sys/class/dax/ interface enumerates all the
device-dax instances in the system. The ABI is
deprecated and will be removed after 2020. It is
replaced with the DAX bus interface /sys/bus/dax/ where
device-dax instances can be found under
/sys/bus/dax/devices/

View File

@ -678,10 +678,12 @@ static const char *spa_type_name(u16 type)
int nfit_spa_type(struct acpi_nfit_system_address *spa)
{
guid_t guid;
int i;
import_guid(&guid, spa->range_guid);
for (i = 0; i < NFIT_UUID_MAX; i++)
if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
if (guid_equal(to_nfit_uuid(i), &guid))
return i;
return -1;
}

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
config DAX_DRIVER
select DAX
bool
menuconfig DAX
tristate "DAX: direct access to differentiated memory"
select SRCU
@ -70,13 +66,4 @@ config DEV_DAX_KMEM
Say N if unsure.
config DEV_DAX_PMEM_COMPAT
tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
depends on m && DEV_DAX_PMEM=m
default DEV_DAX_PMEM
help
Older versions of the libdaxctl library expect to find all
device-dax instances under /sys/class/dax. If libdaxctl in
your distribution is older than v58 say M, otherwise say N.
endif

View File

@ -2,10 +2,11 @@
obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
dax-y := super.o
dax-y += bus.o
device_dax-y := device.o
dax_pmem-y := pmem.o
obj-y += pmem/
obj-y += hmem/

View File

@ -10,8 +10,6 @@
#include "dax-private.h"
#include "bus.h"
static struct class *dax_class;
static DEFINE_MUTEX(dax_bus_lock);
#define DAX_NAME_LEN 30
@ -1323,14 +1321,17 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
}
/*
* No 'host' or dax_operations since there is no access to this
* device outside of mmap of the resulting character device.
* No dax_operations since there is no access to this device outside of
* mmap of the resulting character device.
*/
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
dax_dev = alloc_dax(dev_dax, NULL);
if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev);
goto err_alloc_dax;
}
set_dax_synchronous(dax_dev);
set_dax_nocache(dax_dev);
set_dax_nomc(dax_dev);
/* a device_dax instance is dead while the driver is not attached */
kill_dax(dax_dev);
@ -1343,10 +1344,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
inode = dax_inode(dax_dev);
dev->devt = inode->i_rdev;
if (data->subsys == DEV_DAX_BUS)
dev->bus = &dax_bus_type;
else
dev->class = dax_class;
dev->bus = &dax_bus_type;
dev->parent = parent;
dev->type = &dev_dax_type;
@ -1445,22 +1443,10 @@ EXPORT_SYMBOL_GPL(dax_driver_unregister);
int __init dax_bus_init(void)
{
int rc;
if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
dax_class = class_create(THIS_MODULE, "dax");
if (IS_ERR(dax_class))
return PTR_ERR(dax_class);
}
rc = bus_register(&dax_bus_type);
if (rc)
class_destroy(dax_class);
return rc;
return bus_register(&dax_bus_type);
}
void __exit dax_bus_exit(void)
{
bus_unregister(&dax_bus_type);
class_destroy(dax_class);
}

View File

@ -16,24 +16,15 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct range *range, int target_node, unsigned int align,
unsigned long flags);
enum dev_dax_subsys {
DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
DEV_DAX_CLASS,
};
struct dev_dax_data {
struct dax_region *dax_region;
struct dev_pagemap *pgmap;
enum dev_dax_subsys subsys;
resource_size_t size;
int id;
};
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
/* to be deleted when DEV_DAX_CLASS is removed */
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
struct dax_device_driver {
struct device_driver drv;
struct list_head ids;
@ -49,10 +40,6 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
void dax_driver_unregister(struct dax_device_driver *dax_drv);
void kill_dev_dax(struct dev_dax *dev_dax);
#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
int dev_dax_probe(struct dev_dax *dev_dax);
#endif
/*
* While run_dax() is potentially a generic operation that could be
* defined in include/linux/dax.h we don't want to grow any users

View File

@ -433,11 +433,7 @@ int dev_dax_probe(struct dev_dax *dev_dax)
inode = dax_inode(dax_dev);
cdev = inode->i_cdev;
cdev_init(cdev, &dax_fops);
if (dev->class) {
/* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
cdev->owner = dev->parent->driver->owner;
} else
cdev->owner = dev->driver->owner;
cdev->owner = dev->driver->owner;
cdev_set_parent(cdev, &dev->kobj);
rc = cdev_add(cdev, dev->devt, 1);
if (rc)

View File

@ -3,11 +3,11 @@
#include <linux/memremap.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include "../../nvdimm/pfn.h"
#include "../../nvdimm/nd.h"
#include "../bus.h"
#include "../nvdimm/pfn.h"
#include "../nvdimm/nd.h"
#include "bus.h"
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
static struct dev_dax *__dax_pmem_probe(struct device *dev)
{
struct range range;
int rc, id, region_id;
@ -63,7 +63,6 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
.dax_region = dax_region,
.id = id,
.pgmap = &pgmap,
.subsys = subsys,
.size = range_len(&range),
};
dev_dax = devm_create_dev_dax(&data);
@ -73,7 +72,32 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
return dev_dax;
}
EXPORT_SYMBOL_GPL(__dax_pmem_probe);
static int dax_pmem_probe(struct device *dev)
{
return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev));
}
static struct nd_device_driver dax_pmem_driver = {
.probe = dax_pmem_probe,
.drv = {
.name = "dax_pmem",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_init(void)
{
return nd_driver_register(&dax_pmem_driver);
}
module_init(dax_pmem_init);
static void __exit dax_pmem_exit(void)
{
driver_unregister(&dax_pmem_driver.drv);
}
module_exit(dax_pmem_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);

View File

@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
dax_pmem-y := pmem.o
dax_pmem_core-y := core.o

View File

@ -1,72 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
#include <linux/percpu-refcount.h>
#include <linux/memremap.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include <linux/nd.h>
#include "../bus.h"
/* we need the private definitions to implement compat suport */
#include "../dax-private.h"
static int dax_pmem_compat_probe(struct device *dev)
{
struct dev_dax *dev_dax = __dax_pmem_probe(dev, DEV_DAX_CLASS);
int rc;
if (IS_ERR(dev_dax))
return PTR_ERR(dev_dax);
if (!devres_open_group(&dev_dax->dev, dev_dax, GFP_KERNEL))
return -ENOMEM;
device_lock(&dev_dax->dev);
rc = dev_dax_probe(dev_dax);
device_unlock(&dev_dax->dev);
devres_close_group(&dev_dax->dev, dev_dax);
if (rc)
devres_release_group(&dev_dax->dev, dev_dax);
return rc;
}
static int dax_pmem_compat_release(struct device *dev, void *data)
{
device_lock(dev);
devres_release_group(dev, to_dev_dax(dev));
device_unlock(dev);
return 0;
}
static void dax_pmem_compat_remove(struct device *dev)
{
device_for_each_child(dev, NULL, dax_pmem_compat_release);
}
static struct nd_device_driver dax_pmem_compat_driver = {
.probe = dax_pmem_compat_probe,
.remove = dax_pmem_compat_remove,
.drv = {
.name = "dax_pmem_compat",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_compat_init(void)
{
return nd_driver_register(&dax_pmem_compat_driver);
}
module_init(dax_pmem_compat_init);
static void __exit dax_pmem_compat_exit(void)
{
driver_unregister(&dax_pmem_compat_driver.drv);
}
module_exit(dax_pmem_compat_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);

View File

@ -7,34 +7,4 @@
#include <linux/nd.h>
#include "../bus.h"
static int dax_pmem_probe(struct device *dev)
{
return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev, DEV_DAX_BUS));
}
static struct nd_device_driver dax_pmem_driver = {
.probe = dax_pmem_probe,
.drv = {
.name = "dax_pmem",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_init(void)
{
return nd_driver_register(&dax_pmem_driver);
}
module_init(dax_pmem_init);
static void __exit dax_pmem_exit(void)
{
driver_unregister(&dax_pmem_driver.drv);
}
module_exit(dax_pmem_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
#if !IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
/* For compat builds, don't load this module by default */
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
#endif

View File

@ -7,10 +7,8 @@
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/magic.h>
#include <linux/genhd.h>
#include <linux/pfn_t.h>
#include <linux/cdev.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include <linux/dax.h>
@ -21,15 +19,12 @@
* struct dax_device - anchor object for dax services
* @inode: core vfs
* @cdev: optional character interface for "device dax"
* @host: optional name for lookups where the device path is not available
* @private: dax driver private data
* @flags: state and boolean properties
*/
struct dax_device {
struct hlist_node list;
struct inode inode;
struct cdev cdev;
const char *host;
void *private;
unsigned long flags;
const struct dax_operations *ops;
@ -42,10 +37,6 @@ static DEFINE_IDA(dax_minor_ida);
static struct kmem_cache *dax_cache __read_mostly;
static struct super_block *dax_superblock __read_mostly;
#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
static struct hlist_head dax_host_list[DAX_HASH_SIZE];
static DEFINE_SPINLOCK(dax_host_lock);
int dax_read_lock(void)
{
return srcu_read_lock(&dax_srcu);
@ -58,169 +49,54 @@ void dax_read_unlock(int id)
}
EXPORT_SYMBOL_GPL(dax_read_unlock);
static int dax_host_hash(const char *host)
{
return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
}
#ifdef CONFIG_BLOCK
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
#include <linux/blkdev.h>
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff)
static DEFINE_XARRAY(dax_hosts);
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
{
sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
phys_addr_t phys_off = (start_sect + sector) * 512;
if (pgoff)
*pgoff = PHYS_PFN(phys_off);
if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
return -EINVAL;
return 0;
return xa_insert(&dax_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL);
}
EXPORT_SYMBOL(bdev_dax_pgoff);
EXPORT_SYMBOL_GPL(dax_add_host);
void dax_remove_host(struct gendisk *disk)
{
xa_erase(&dax_hosts, (unsigned long)disk);
}
EXPORT_SYMBOL_GPL(dax_remove_host);
#if IS_ENABLED(CONFIG_FS_DAX)
/**
* dax_get_by_host() - temporary lookup mechanism for filesystem-dax
* @host: alternate name for the device registered by a dax driver
* fs_dax_get_by_bdev() - temporary lookup mechanism for filesystem-dax
* @bdev: block device to find a dax_device for
* @start_off: returns the byte offset into the dax_device that @bdev starts
*/
static struct dax_device *dax_get_by_host(const char *host)
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off)
{
struct dax_device *dax_dev, *found = NULL;
int hash, id;
if (!host)
return NULL;
hash = dax_host_hash(host);
id = dax_read_lock();
spin_lock(&dax_host_lock);
hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
if (!dax_alive(dax_dev)
|| strcmp(host, dax_dev->host) != 0)
continue;
if (igrab(&dax_dev->inode))
found = dax_dev;
break;
}
spin_unlock(&dax_host_lock);
dax_read_unlock(id);
return found;
}
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
if (!blk_queue_dax(bdev->bd_disk->queue))
return NULL;
return dax_get_by_host(bdev->bd_disk->disk_name);
}
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
bool generic_fsdax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t sectors)
{
bool dax_enabled = false;
pgoff_t pgoff, pgoff_end;
void *kaddr, *end_kaddr;
pfn_t pfn, end_pfn;
sector_t last_page;
long len, len2;
int err, id;
if (blocksize != PAGE_SIZE) {
pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
return false;
}
if (!dax_dev) {
pr_debug("%pg: error: dax unsupported by block device\n", bdev);
return false;
}
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
pr_info("%pg: error: unaligned partition for dax\n", bdev);
return false;
}
last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
if (err) {
pr_info("%pg: error: unaligned partition for dax\n", bdev);
return false;
}
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
if (len < 1 || len2 < 1) {
pr_info("%pg: error: dax access failed (%ld)\n",
bdev, len < 1 ? len : len2);
dax_read_unlock(id);
return false;
}
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
/*
* An arch that has enabled the pmem api should also
* have its drivers support pfn_t_devmap()
*
* This is a developer warning and should not trigger in
* production. dax_flush() will crash since it depends
* on being able to do (page_address(pfn_to_page())).
*/
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
dax_enabled = true;
} else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
struct dev_pagemap *pgmap, *end_pgmap;
pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
&& pfn_t_to_page(pfn)->pgmap == pgmap
&& pfn_t_to_page(end_pfn)->pgmap == pgmap
&& pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
&& pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
dax_enabled = true;
put_dev_pagemap(pgmap);
put_dev_pagemap(end_pgmap);
}
dax_read_unlock(id);
if (!dax_enabled) {
pr_info("%pg: error: dax support not enabled\n", bdev);
return false;
}
return true;
}
EXPORT_SYMBOL_GPL(generic_fsdax_supported);
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len)
{
bool ret = false;
struct dax_device *dax_dev;
u64 part_size;
int id;
if (!dax_dev)
return false;
if (!blk_queue_dax(bdev->bd_disk->queue))
return NULL;
*start_off = get_start_sect(bdev) * SECTOR_SIZE;
part_size = bdev_nr_sectors(bdev) * SECTOR_SIZE;
if (*start_off % PAGE_SIZE || part_size % PAGE_SIZE) {
pr_info("%pg: error: unaligned partition for dax\n", bdev);
return NULL;
}
id = dax_read_lock();
if (dax_alive(dax_dev) && dax_dev->ops->dax_supported)
ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize,
start, len);
dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk);
if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode))
dax_dev = NULL;
dax_read_unlock(id);
return ret;
return dax_dev;
}
EXPORT_SYMBOL_GPL(dax_supported);
#endif /* CONFIG_FS_DAX */
#endif /* CONFIG_BLOCK */
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
enum dax_device_flags {
/* !alive + rcu grace period == no new operations / mappings */
@ -229,6 +105,10 @@ enum dax_device_flags {
DAXDEV_WRITE_CACHE,
/* flag to check if device supports synchronous flush */
DAXDEV_SYNC,
/* do not leave the caches dirty after writes */
DAXDEV_NOCACHE,
/* handle CPU fetch exceptions during reads */
DAXDEV_NOMC,
};
/**
@ -270,9 +150,15 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
if (!dax_alive(dax_dev))
return 0;
return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
/*
* The userspace address for the memory copy has already been validated
* via access_ok() in vfs_write, so use the 'no check' version to bypass
* the HARDENED_USERCOPY overhead.
*/
if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags))
return _copy_from_iter_flushcache(addr, bytes, i);
return _copy_from_iter(addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
@ -280,9 +166,15 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
if (!dax_alive(dax_dev))
return 0;
return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
/*
* The userspace address for the memory copy has already been validated
* via access_ok() in vfs_red, so use the 'no check' version to bypass
* the HARDENED_USERCOPY overhead.
*/
if (test_bit(DAXDEV_NOMC, &dax_dev->flags))
return _copy_mc_to_iter(addr, bytes, i);
return _copy_to_iter(addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_to_iter);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
@ -332,17 +224,29 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
bool __dax_synchronous(struct dax_device *dax_dev)
bool dax_synchronous(struct dax_device *dax_dev)
{
return test_bit(DAXDEV_SYNC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(__dax_synchronous);
EXPORT_SYMBOL_GPL(dax_synchronous);
void __set_dax_synchronous(struct dax_device *dax_dev)
void set_dax_synchronous(struct dax_device *dax_dev)
{
set_bit(DAXDEV_SYNC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(__set_dax_synchronous);
EXPORT_SYMBOL_GPL(set_dax_synchronous);
void set_dax_nocache(struct dax_device *dax_dev)
{
set_bit(DAXDEV_NOCACHE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nocache);
void set_dax_nomc(struct dax_device *dax_dev)
{
set_bit(DAXDEV_NOMC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nomc);
bool dax_alive(struct dax_device *dax_dev)
{
@ -363,12 +267,7 @@ void kill_dax(struct dax_device *dax_dev)
return;
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu);
spin_lock(&dax_host_lock);
hlist_del_init(&dax_dev->list);
spin_unlock(&dax_host_lock);
}
EXPORT_SYMBOL_GPL(kill_dax);
@ -400,8 +299,6 @@ static struct dax_device *to_dax_dev(struct inode *inode)
static void dax_free_inode(struct inode *inode)
{
struct dax_device *dax_dev = to_dax_dev(inode);
kfree(dax_dev->host);
dax_dev->host = NULL;
if (inode->i_rdev)
ida_simple_remove(&dax_minor_ida, iminor(inode));
kmem_cache_free(dax_cache, dax_dev);
@ -476,65 +373,30 @@ static struct dax_device *dax_dev_get(dev_t devt)
return dax_dev;
}
static void dax_add_host(struct dax_device *dax_dev, const char *host)
{
int hash;
/*
* Unconditionally init dax_dev since it's coming from a
* non-zeroed slab cache
*/
INIT_HLIST_NODE(&dax_dev->list);
dax_dev->host = host;
if (!host)
return;
hash = dax_host_hash(host);
spin_lock(&dax_host_lock);
hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
spin_unlock(&dax_host_lock);
}
struct dax_device *alloc_dax(void *private, const char *__host,
const struct dax_operations *ops, unsigned long flags)
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
{
struct dax_device *dax_dev;
const char *host;
dev_t devt;
int minor;
if (ops && !ops->zero_page_range) {
pr_debug("%s: error: device does not provide dax"
" operation zero_page_range()\n",
__host ? __host : "Unknown");
if (WARN_ON_ONCE(ops && !ops->zero_page_range))
return ERR_PTR(-EINVAL);
}
host = kstrdup(__host, GFP_KERNEL);
if (__host && !host)
return ERR_PTR(-ENOMEM);
minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
if (minor < 0)
goto err_minor;
return ERR_PTR(-ENOMEM);
devt = MKDEV(MAJOR(dax_devt), minor);
dax_dev = dax_dev_get(devt);
if (!dax_dev)
goto err_dev;
dax_add_host(dax_dev, host);
dax_dev->ops = ops;
dax_dev->private = private;
if (flags & DAXDEV_F_SYNC)
set_dax_synchronous(dax_dev);
return dax_dev;
err_dev:
ida_simple_remove(&dax_minor_ida, minor);
err_minor:
kfree(host);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(alloc_dax);

View File

@ -162,71 +162,34 @@ static int linear_iterate_devices(struct dm_target *ti,
return fn(ti, lc->dev, lc->start, ti->len, data);
}
#if IS_ENABLED(CONFIG_DAX_DRIVER)
#if IS_ENABLED(CONFIG_FS_DAX)
static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
{
struct linear_c *lc = ti->private;
sector_t sector = linear_map_sector(ti, *pgoff << PAGE_SECTORS_SHIFT);
*pgoff = (get_start_sect(lc->dev->bdev) + sector) >> PAGE_SECTORS_SHIFT;
return lc->dev->dax_dev;
}
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
long ret;
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
dev_sector = linear_map_sector(ti, sector);
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
return ret;
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
dev_sector = linear_map_sector(ti, sector);
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
}
#else
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL
#define linear_dax_zero_page_range NULL
#endif
@ -244,8 +207,6 @@ static struct target_type linear_target = {
.prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter,
.dax_zero_page_range = linear_dax_zero_page_range,
};

View File

@ -901,120 +901,34 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
limits->io_min = limits->physical_block_size;
}
#if IS_ENABLED(CONFIG_DAX_DRIVER)
static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
struct iov_iter *i)
#if IS_ENABLED(CONFIG_FS_DAX)
static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
pgoff_t *pgoff)
{
struct pending_block *block;
struct log_writes_c *lc = ti->private;
if (!bytes)
return 0;
block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
if (!block) {
DMERR("Error allocating dax pending block");
return -ENOMEM;
}
block->data = kzalloc(bytes, GFP_KERNEL);
if (!block->data) {
DMERR("Error allocating dax data space");
kfree(block);
return -ENOMEM;
}
/* write data provided via the iterator */
if (!copy_from_iter(block->data, bytes, i)) {
DMERR("Error copying dax data");
kfree(block->data);
kfree(block);
return -EIO;
}
/* rewind the iterator so that the block driver can use it */
iov_iter_revert(i, bytes);
block->datalen = bytes;
block->sector = bio_to_dev_sectors(lc, sector);
block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
atomic_inc(&lc->pending_blocks);
spin_lock_irq(&lc->blocks_lock);
list_add_tail(&block->list, &lc->unflushed_blocks);
spin_unlock_irq(&lc->blocks_lock);
wake_up_process(lc->log_kthread);
return 0;
*pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT);
return lc->dev->dax_dev;
}
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
int ret;
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
return ret;
return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
int err;
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
/* Don't bother doing anything if logging has been disabled */
if (!lc->logging_enabled)
goto dax_copy;
err = log_dax(lc, sector, bytes, i);
if (err) {
DMWARN("Error %d logging DAX write", err);
return 0;
}
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
&pgoff);
if (ret)
return ret;
return dax_zero_page_range(lc->dev->dax_dev, pgoff,
nr_pages << PAGE_SHIFT);
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
}
#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL
#define log_writes_dax_zero_page_range NULL
#endif
@ -1032,8 +946,6 @@ static struct target_type log_writes_target = {
.iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
.dax_copy_to_iter = log_writes_dax_copy_to_iter,
.dax_zero_page_range = log_writes_dax_zero_page_range,
};

View File

@ -300,91 +300,40 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
#if IS_ENABLED(CONFIG_DAX_DRIVER)
#if IS_ENABLED(CONFIG_FS_DAX)
static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
{
struct stripe_c *sc = ti->private;
struct block_device *bdev;
sector_t dev_sector;
uint32_t stripe;
stripe_map_sector(sc, *pgoff * PAGE_SECTORS, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
bdev = sc->stripe[stripe].dev->bdev;
*pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT;
return sc->stripe[stripe].dev->dax_dev;
}
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
long ret;
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
return ret;
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages)
{
int ret;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
}
#else
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL
#define stripe_dax_zero_page_range NULL
#endif
@ -521,8 +470,6 @@ static struct target_type stripe_target = {
.iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_copy_to_iter = stripe_dax_copy_to_iter,
.dax_zero_page_range = stripe_dax_zero_page_range,
};

View File

@ -806,12 +806,14 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
EXPORT_SYMBOL_GPL(dm_table_set_type);
/* validate the dax capability of the target device span */
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
int blocksize = *(int *) data;
if (dev->dax_dev)
return false;
return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
return true;
}
/* Check devices support synchronous DAX */
@ -821,8 +823,8 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de
return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
}
bool dm_table_supports_dax(struct dm_table *t,
iterate_devices_callout_fn iterate_fn, int *blocksize)
static bool dm_table_supports_dax(struct dm_table *t,
iterate_devices_callout_fn iterate_fn)
{
struct dm_target *ti;
unsigned i;
@ -835,7 +837,7 @@ bool dm_table_supports_dax(struct dm_table *t,
return false;
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, iterate_fn, blocksize))
ti->type->iterate_devices(ti, iterate_fn, NULL))
return false;
}
@ -862,7 +864,6 @@ static int dm_table_determine_type(struct dm_table *t)
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
int page_size = PAGE_SIZE;
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
@ -906,7 +907,7 @@ static int dm_table_determine_type(struct dm_table *t)
verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) ||
if (dm_table_supports_dax(t, device_not_dax_capable) ||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED;
}
@ -1976,7 +1977,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
bool wc = false, fua = false;
int page_size = PAGE_SIZE;
int r;
/*
@ -2010,9 +2010,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
blk_queue_write_cache(q, wc, fua);
if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) {
if (dm_table_supports_dax(t, device_not_dax_capable)) {
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL))
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
set_dax_synchronous(t->md->dax_dev);
}
else

View File

@ -38,7 +38,7 @@
#define BITMAP_GRANULARITY PAGE_SIZE
#endif
#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER)
#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX)
#define DM_WRITECACHE_HAS_PMEM
#endif

View File

@ -637,7 +637,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md)
{
struct block_device *bdev;
u64 part_off;
int r;
BUG_ON(td->dm_dev.bdev);
@ -653,7 +653,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
}
td->dm_dev.bdev = bdev;
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off);
return 0;
}
@ -1027,74 +1027,6 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return ret;
}
static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len)
{
struct mapped_device *md = dax_get_private(dax_dev);
struct dm_table *map;
bool ret = false;
int srcu_idx;
map = dm_get_live_table(md, &srcu_idx);
if (!map)
goto out;
ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (!ti->type->dax_copy_from_iter) {
ret = copy_from_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (!ti->type->dax_copy_to_iter) {
ret = copy_to_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
{
@ -1683,6 +1615,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
bioset_exit(&md->io_bs);
if (md->dax_dev) {
dax_remove_host(md->disk);
kill_dax(md->dax_dev);
put_dax(md->dax_dev);
md->dax_dev = NULL;
@ -1784,10 +1717,15 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
md->dax_dev = alloc_dax(md, md->disk->disk_name,
&dm_dax_ops, 0);
if (IS_ERR(md->dax_dev))
if (IS_ENABLED(CONFIG_FS_DAX)) {
md->dax_dev = alloc_dax(md, &dm_dax_ops);
if (IS_ERR(md->dax_dev)) {
md->dax_dev = NULL;
goto bad;
}
set_dax_nocache(md->dax_dev);
set_dax_nomc(md->dax_dev);
if (dax_add_host(md->dax_dev, md->disk))
goto bad;
}
@ -3041,9 +2979,6 @@ static const struct block_device_operations dm_rq_blk_dops = {
static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access,
.dax_supported = dm_dax_supported,
.copy_from_iter = dm_dax_copy_from_iter,
.copy_to_iter = dm_dax_copy_to_iter,
.zero_page_range = dm_dax_zero_page_range,
};

View File

@ -73,10 +73,6 @@ bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
int *blocksize);
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);

View File

@ -22,7 +22,7 @@ if LIBNVDIMM
config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM
select DAX_DRIVER
select DAX
select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN
help

View File

@ -301,29 +301,8 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}
/*
* Use the 'no check' versions of copy_from_iter_flushcache() and
* copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
* checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_from_iter_flushcache(addr, bytes, i);
}
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_mc_to_iter(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
.dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
.zero_page_range = pmem_dax_zero_page_range,
};
@ -379,6 +358,7 @@ static void pmem_release_disk(void *__pmem)
{
struct pmem_device *pmem = __pmem;
dax_remove_host(pmem->disk);
kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev);
del_gendisk(pmem->disk);
@ -402,7 +382,6 @@ static int pmem_attach_disk(struct device *dev,
struct gendisk *disk;
void *addr;
int rc;
unsigned long flags = 0UL;
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem)
@ -495,19 +474,24 @@ static int pmem_attach_disk(struct device *dev,
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
disk->bb = &pmem->bb;
if (is_nvdimm_sync(nd_region))
flags = DAXDEV_F_SYNC;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
dax_dev = alloc_dax(pmem, &pmem_dax_ops);
if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev);
goto out;
}
set_dax_nocache(dax_dev);
set_dax_nomc(dax_dev);
if (is_nvdimm_sync(nd_region))
set_dax_synchronous(dax_dev);
rc = dax_add_host(dax_dev, disk);
if (rc)
goto out_cleanup_dax;
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
rc = device_add_disk(dev, disk, pmem_attribute_groups);
if (rc)
goto out_cleanup_dax;
goto out_remove_host;
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM;
@ -519,6 +503,8 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "'badblocks' notification disabled\n");
return 0;
out_remove_host:
dax_remove_host(pmem->disk);
out_cleanup_dax:
kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev);

View File

@ -219,7 +219,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,
range_len(&pgmap->range), dev_to_node(&pdev->dev),
pgmap->ref);
&pgmap->ref);
if (error)
goto pages_free;

View File

@ -5,7 +5,7 @@ comment "S/390 block device drivers"
config DCSSBLK
def_tristate m
select FS_DAX_LIMITED
select DAX_DRIVER
select DAX
prompt "DCSSBLK support"
depends on S390 && BLOCK
help

View File

@ -44,18 +44,6 @@ static const struct block_device_operations dcssblk_devops = {
.release = dcssblk_release,
};
static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
{
return copy_to_iter(addr, bytes, i);
}
static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages)
{
@ -72,9 +60,6 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access,
.dax_supported = generic_fsdax_supported,
.copy_from_iter = dcssblk_dax_copy_from_iter,
.copy_to_iter = dcssblk_dax_copy_to_iter,
.zero_page_range = dcssblk_dax_zero_page_range,
};
@ -687,18 +672,21 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
if (rc)
goto put_dev;
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
&dcssblk_dax_ops, DAXDEV_F_SYNC);
dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
if (IS_ERR(dev_info->dax_dev)) {
rc = PTR_ERR(dev_info->dax_dev);
dev_info->dax_dev = NULL;
goto put_dev;
}
set_dax_synchronous(dev_info->dax_dev);
rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
if (rc)
goto out_dax;
get_device(&dev_info->dev);
rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL);
if (rc)
goto out_dax;
goto out_dax_host;
switch (dev_info->segment_type) {
case SEG_TYPE_SR:
@ -714,6 +702,8 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
rc = count;
goto out;
out_dax_host:
dax_remove_host(dev_info->gd);
out_dax:
put_device(&dev_info->dev);
kill_dax(dev_info->dax_dev);

View File

@ -15,11 +15,11 @@ config VALIDATE_FS_PARSER
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
if BLOCK
config FS_IOMAP
bool
if BLOCK
source "fs/ext2/Kconfig"
source "fs/ext4/Kconfig"
source "fs/jbd2/Kconfig"
@ -42,6 +42,8 @@ source "fs/nilfs2/Kconfig"
source "fs/f2fs/Kconfig"
source "fs/zonefs/Kconfig"
endif # BLOCK
config FS_DAX
bool "File system based Direct Access (DAX) support"
depends on MMU
@ -89,8 +91,6 @@ config FS_DAX_PMD
config FS_DAX_LIMITED
bool
endif # BLOCK
# Posix ACL utility routines
#
# Note: Posix ACLs can be implemented without these helpers. Never use

161
fs/dax.c
View File

@ -709,26 +709,26 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
return __dax_invalidate_entry(mapping, index, false);
}
static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev,
sector_t sector, struct page *to, unsigned long vaddr)
static pgoff_t dax_iomap_pgoff(const struct iomap *iomap, loff_t pos)
{
return PHYS_PFN(iomap->addr + (pos & PAGE_MASK) - iomap->offset);
}
static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter)
{
pgoff_t pgoff = dax_iomap_pgoff(&iter->iomap, iter->pos);
void *vto, *kaddr;
pgoff_t pgoff;
long rc;
int id;
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
}
vto = kmap_atomic(to);
copy_user_page(vto, (void __force *)kaddr, vaddr, to);
vto = kmap_atomic(vmf->cow_page);
copy_user_page(vto, kaddr, vmf->address, vmf->cow_page);
kunmap_atomic(vto);
dax_read_unlock(id);
return 0;
@ -1005,22 +1005,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
{
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
}
static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
pfn_t *pfnp)
{
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
int id, rc;
long length;
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
NULL, pfnp);
@ -1126,50 +1117,94 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
}
#endif /* CONFIG_FS_DAX_PMD */
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
unsigned int offset, size_t size)
{
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
pgoff_t pgoff;
long rc, id;
void *kaddr;
bool page_aligned = false;
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
long ret;
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
(size == PAGE_SIZE))
page_aligned = true;
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
if (page_aligned)
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
else
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
}
if (!page_aligned) {
ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
if (ret > 0) {
memset(kaddr + offset, 0, size);
dax_flush(iomap->dax_dev, kaddr + offset, size);
dax_flush(dax_dev, kaddr + offset, size);
}
dax_read_unlock(id);
return size;
return ret;
}
static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
const struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
u64 length = iomap_length(iter);
s64 written = 0;
/* already zeroed? we're done. */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
return length;
do {
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
long rc;
int id;
id = dax_read_lock();
if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
else
rc = dax_memzero(iomap->dax_dev, pgoff, offset, size);
dax_read_unlock(id);
if (rc < 0)
return rc;
pos += size;
length -= size;
written += size;
if (did_zero)
*did_zero = true;
} while (length > 0);
return written;
}
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = inode,
.pos = pos,
.len = len,
.flags = IOMAP_DAX | IOMAP_ZERO,
};
int ret;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = dax_zero_iter(&iter, did_zero);
return ret;
}
EXPORT_SYMBOL_GPL(dax_zero_range);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops)
{
unsigned int blocksize = i_blocksize(inode);
unsigned int off = pos & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!off)
return 0;
return dax_zero_range(inode, pos, blocksize - off, did_zero, ops);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
struct iov_iter *iter)
{
const struct iomap *iomap = &iomi->iomap;
loff_t length = iomap_length(iomi);
loff_t pos = iomi->pos;
struct block_device *bdev = iomap->bdev;
struct dax_device *dax_dev = iomap->dax_dev;
loff_t end = pos + length, done = 0;
ssize_t ret = 0;
@ -1203,9 +1238,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
while (pos < end) {
unsigned offset = pos & (PAGE_SIZE - 1);
const size_t size = ALIGN(length + offset, PAGE_SIZE);
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
ssize_t map_len;
pgoff_t pgoff;
void *kaddr;
if (fatal_signal_pending(current)) {
@ -1213,10 +1247,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
break;
}
ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
if (ret)
break;
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
&kaddr, NULL);
if (map_len < 0) {
@ -1230,11 +1260,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
if (map_len > end - pos)
map_len = end - pos;
/*
* The userspace address for the memory copy has already been
* validated via access_ok() in either vfs_read() or
* vfs_write(), depending on which operation we are doing.
*/
if (iov_iter_rw(iter) == WRITE)
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter);
@ -1274,6 +1299,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
.inode = iocb->ki_filp->f_mapping->host,
.pos = iocb->ki_pos,
.len = iov_iter_count(iter),
.flags = IOMAP_DAX,
};
loff_t done = 0;
int ret;
@ -1332,19 +1358,16 @@ static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
const struct iomap_iter *iter)
{
sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
unsigned long vaddr = vmf->address;
vm_fault_t ret;
int error = 0;
switch (iter->iomap.type) {
case IOMAP_HOLE:
case IOMAP_UNWRITTEN:
clear_user_highpage(vmf->cow_page, vaddr);
clear_user_highpage(vmf->cow_page, vmf->address);
break;
case IOMAP_MAPPED:
error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev,
sector, vmf->cow_page, vaddr);
error = copy_cow_page_dax(vmf, iter);
break;
default:
WARN_ON_ONCE(1);
@ -1430,7 +1453,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
.inode = mapping->host,
.pos = (loff_t)vmf->pgoff << PAGE_SHIFT,
.len = PAGE_SIZE,
.flags = IOMAP_FAULT,
.flags = IOMAP_DAX | IOMAP_FAULT,
};
vm_fault_t ret = 0;
void *entry;
@ -1539,7 +1562,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
struct iomap_iter iter = {
.inode = mapping->host,
.len = PMD_SIZE,
.flags = IOMAP_FAULT,
.flags = IOMAP_DAX | IOMAP_FAULT,
};
vm_fault_t ret = VM_FAULT_FALLBACK;
pgoff_t max_pgoff;

View File

@ -192,6 +192,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/* primary device by default */
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
if (map->m_deviceid) {
down_read(&devs->rwsem);
@ -202,6 +203,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
}
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
up_read(&devs->rwsem);
} else if (devs->extra_devices) {
down_read(&devs->rwsem);
@ -218,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
map->m_pa -= startoff;
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
break;
}
}
@ -248,9 +251,13 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret)
return ret;
iomap->bdev = mdev.m_bdev;
iomap->dax_dev = mdev.m_daxdev;
iomap->offset = map.m_la;
if (flags & IOMAP_DAX) {
iomap->dax_dev = mdev.m_daxdev;
iomap->offset += mdev.m_dax_part_off;
} else {
iomap->bdev = mdev.m_bdev;
}
iomap->length = map.m_llen;
iomap->flags = 0;
iomap->private = NULL;

View File

@ -51,6 +51,7 @@ struct erofs_device_info {
char *path;
struct block_device *bdev;
struct dax_device *dax_dev;
u64 dax_part_off;
u32 blocks;
u32 mapped_blkaddr;
@ -115,6 +116,7 @@ struct erofs_sb_info {
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
u64 dax_part_off;
u64 total_blocks;
u32 primarydevice_blocks;
@ -467,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_dev {
struct block_device *m_bdev;
struct dax_device *m_daxdev;
u64 m_dax_part_off;
erofs_off_t m_pa;
unsigned int m_deviceid;

View File

@ -267,7 +267,7 @@ static int erofs_init_devices(struct super_block *sb,
break;
}
dif->bdev = bdev;
dif->dax_dev = fs_dax_get_by_bdev(bdev);
dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off);
dif->blocks = le32_to_cpu(dis->blocks);
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
sbi->total_blocks += dif->blocks;
@ -597,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off);
sbi->devs = ctx->devs;
ctx->devs = NULL;
@ -605,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
if (test_opt(&sbi->opt, DAX_ALWAYS) &&
!dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS);
if (test_opt(&sbi->opt, DAX_ALWAYS)) {
BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
if (!sbi->dax_dev) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS);
}
}
sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE;

View File

@ -118,6 +118,7 @@ struct ext2_sb_info {
spinlock_t s_lock;
struct mb_cache *s_ea_block_cache;
struct dax_device *s_daxdev;
u64 s_dax_part_off;
};
static inline spinlock_t *

View File

@ -36,6 +36,7 @@
#include <linux/iomap.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/dax.h>
#include "ext2.h"
#include "acl.h"
#include "xattr.h"
@ -816,9 +817,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret;
iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64)first_block << blkbits;
iomap->dax_dev = sbi->s_daxdev;
if (flags & IOMAP_DAX)
iomap->dax_dev = sbi->s_daxdev;
else
iomap->bdev = inode->i_sb->s_bdev;
if (ret == 0) {
iomap->type = IOMAP_HOLE;
@ -827,6 +830,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} else {
iomap->type = IOMAP_MAPPED;
iomap->addr = (u64)bno << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += sbi->s_dax_part_off;
iomap->length = (u64)ret << blkbits;
iomap->flags |= IOMAP_F_MERGED;
}
@ -1297,9 +1302,9 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode);
if (IS_DAX(inode)) {
error = iomap_zero_range(inode, newsize,
PAGE_ALIGN(newsize) - newsize, NULL,
&ext2_iomap_ops);
error = dax_zero_range(inode, newsize,
PAGE_ALIGN(newsize) - newsize, NULL,
&ext2_iomap_ops);
} else if (test_opt(inode->i_sb, NOBH))
error = nobh_truncate_page(inode->i_mapping,
newsize, ext2_get_block);

View File

@ -802,7 +802,6 @@ static unsigned long descriptor_loc(struct super_block *sb,
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
struct buffer_head * bh;
struct ext2_sb_info * sbi;
struct ext2_super_block * es;
@ -822,17 +821,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
goto failed;
return -ENOMEM;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) {
kfree(sbi);
goto failed;
return -ENOMEM;
}
sb->s_fs_info = sbi;
sbi->s_sb_block = sb_block;
sbi->s_daxdev = dax_dev;
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
spin_lock_init(&sbi->s_lock);
ret = -EINVAL;
@ -946,11 +945,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (test_opt(sb, DAX)) {
if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
bdev_nr_sectors(sb->s_bdev))) {
if (!sbi->s_daxdev) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
clear_opt(sbi->s_mount_opt, DAX);
} else if (blocksize != PAGE_SIZE) {
ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
clear_opt(sbi->s_mount_opt, DAX);
}
}
@ -1199,11 +1200,10 @@ failed_mount_group_desc:
failed_mount:
brelse(bh);
failed_sbi:
fs_put_dax(sbi->s_daxdev);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
failed:
fs_put_dax(dax_dev);
return ret;
}

View File

@ -1699,6 +1699,7 @@ struct ext4_sb_info {
*/
struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev;
u64 s_dax_part_off;
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
#endif

View File

@ -41,6 +41,7 @@
#include <linux/bitops.h>
#include <linux/iomap.h>
#include <linux/iversion.h>
#include <linux/dax.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@ -3253,7 +3254,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
struct ext4_map_blocks *map, loff_t offset,
loff_t length)
loff_t length, unsigned int flags)
{
u8 blkbits = inode->i_blkbits;
@ -3270,8 +3271,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
if (map->m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW;
iomap->bdev = inode->i_sb->s_bdev;
iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
if (flags & IOMAP_DAX)
iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
else
iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64) map->m_lblk << blkbits;
iomap->length = (u64) map->m_len << blkbits;
@ -3291,9 +3294,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
if (map->m_flags & EXT4_MAP_UNWRITTEN) {
iomap->type = IOMAP_UNWRITTEN;
iomap->addr = (u64) map->m_pblk << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else if (map->m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
iomap->addr = (u64) map->m_pblk << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
@ -3330,8 +3337,8 @@ retry:
* DAX and direct I/O are the only two operations that are currently
* supported with IOMAP_WRITE.
*/
WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT));
if (IS_DAX(inode))
WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT)));
if (flags & IOMAP_DAX)
m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
/*
* We use i_size instead of i_disksize here because delalloc writeback
@ -3402,7 +3409,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret < 0)
return ret;
out:
ext4_set_iomap(inode, iomap, &map, offset, length);
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
return 0;
}
@ -3522,7 +3529,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
delalloc = ext4_iomap_is_delalloc(inode, &map);
set_iomap:
ext4_set_iomap(inode, iomap, &map, offset, length);
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
if (delalloc && iomap->type == IOMAP_HOLE)
iomap->type = IOMAP_DELALLOC;
@ -3762,8 +3769,8 @@ static int ext4_block_zero_page_range(handle_t *handle,
length = max;
if (IS_DAX(inode)) {
return iomap_zero_range(inode, from, length, NULL,
&ext4_iomap_ops);
return dax_zero_range(inode, from, length, NULL,
&ext4_iomap_ops);
}
return __ext4_block_zero_page_range(handle, mapping, from, length);
}

View File

@ -4338,7 +4338,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
if (!sbi)
return NULL;
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev);
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
@ -4756,9 +4756,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount;
}
if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0,
bdev_nr_sectors(sb->s_bdev)))
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
if (sbi->s_daxdev) {
if (blocksize == PAGE_SIZE)
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
else
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {

View File

@ -45,7 +45,7 @@ config FUSE_DAX
select INTERVAL_TREE
depends on VIRTIO_FS
depends on FS_DAX
depends on DAX_DRIVER
depends on DAX
help
This allows bypassing guest page cache and allows mapping host page
cache directly in guest address space.

View File

@ -765,20 +765,6 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
}
static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
return copy_to_iter(addr, bytes, i);
}
static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages)
{
@ -795,8 +781,6 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
static const struct dax_operations virtio_fs_dax_ops = {
.direct_access = virtio_fs_direct_access,
.copy_from_iter = virtio_fs_copy_from_iter,
.copy_to_iter = virtio_fs_copy_to_iter,
.zero_page_range = virtio_fs_zero_page_range,
};
@ -862,7 +846,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
if (IS_ERR(fs->dax_dev))
return PTR_ERR(fs->dax_dev);

View File

@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += trace.o \
buffered-io.o \
iter.o
iomap-$(CONFIG_BLOCK) += buffered-io.o \
direct-io.o \
fiemap.o \
iter.o \
seek.o
iomap-$(CONFIG_SWAP) += swapfile.o

View File

@ -897,7 +897,6 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare);
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
@ -913,14 +912,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length);
if (IS_DAX(iter->inode)) {
s64 tmp = dax_iomap_zero(pos, bytes, iomap);
if (tmp < 0)
return tmp;
bytes = tmp;
goto good;
}
status = iomap_write_begin(iter, pos, bytes, &folio);
if (status)
return status;
@ -933,7 +924,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_mark_accessed(folio);
bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
good:
if (WARN_ON_ONCE(bytes == 0))
return -EIO;

View File

@ -4551,7 +4551,7 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset.
*/
if (!isnullstartblock(bma.got.br_startblock)) {
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
@ -4598,7 +4598,7 @@ xfs_bmapi_convert_delalloc(
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)

View File

@ -359,7 +359,7 @@ retry:
isnullstartblock(imap.br_startblock))
goto allocate_blocks;
xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0;
allocate_blocks:

View File

@ -1001,7 +1001,7 @@ xfs_free_file_space(
/*
* Now that we've unmap all full blocks we'll have to zero out any
* partial block at the beginning and/or end. iomap_zero_range is smart
* partial block at the beginning and/or end. xfs_zero_range is smart
* enough to skip any holes, including those we just created, but we
* must take care not to zero beyond EOF and enlarge i_size.
*/
@ -1009,15 +1009,14 @@ xfs_free_file_space(
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
&xfs_buffered_write_iomap_ops);
error = xfs_zero_range(ip, offset, len, NULL);
if (error)
return error;
/*
* If we zeroed right up to EOF and EOF straddles a page boundary we
* must make sure that the post-EOF area is also zeroed because the
* page could be mmap'd and iomap_zero_range doesn't do that for us.
* page could be mmap'd and xfs_zero_range doesn't do that for us.
* Writeback of the eof page will do this, albeit clumsily.
*/
if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {

View File

@ -1892,6 +1892,7 @@ xfs_free_buftarg(
list_lru_destroy(&btp->bt_lru);
blkdev_issue_flush(btp->bt_bdev);
fs_put_dax(btp->bt_daxdev);
kmem_free(btp);
}
@ -1932,11 +1933,10 @@ xfs_setsize_buftarg_early(
return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
}
xfs_buftarg_t *
struct xfs_buftarg *
xfs_alloc_buftarg(
struct xfs_mount *mp,
struct block_device *bdev,
struct dax_device *dax_dev)
struct block_device *bdev)
{
xfs_buftarg_t *btp;
@ -1945,7 +1945,7 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
btp->bt_daxdev = dax_dev;
btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
/*
* Buffer IO error rate limiting. Limit it to no more than 10 messages

View File

@ -89,6 +89,7 @@ typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
struct dax_device *bt_daxdev;
u64 bt_dax_part_off;
struct xfs_mount *bt_mount;
unsigned int bt_meta_sectorsize;
size_t bt_meta_sectormask;
@ -338,8 +339,8 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
/*
* Handling of buftargs.
*/
extern struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *,
struct block_device *, struct dax_device *);
struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
struct block_device *bdev);
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);

View File

@ -437,8 +437,7 @@ restart:
}
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
NULL, &xfs_buffered_write_iomap_ops);
error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
if (error)
return error;
} else

View File

@ -28,7 +28,6 @@
#include "xfs_dquot.h"
#include "xfs_reflink.h"
#define XFS_ALLOC_ALIGN(mp, off) \
(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
@ -54,7 +53,8 @@ xfs_bmbt_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
struct xfs_bmbt_irec *imap,
u16 flags)
unsigned int mapping_flags,
u16 iomap_flags)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
@ -71,16 +71,22 @@ xfs_bmbt_to_iomap(
iomap->type = IOMAP_DELALLOC;
} else {
iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
if (mapping_flags & IOMAP_DAX)
iomap->addr += target->bt_dax_part_off;
if (imap->br_state == XFS_EXT_UNWRITTEN)
iomap->type = IOMAP_UNWRITTEN;
else
iomap->type = IOMAP_MAPPED;
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
iomap->bdev = target->bt_bdev;
iomap->dax_dev = target->bt_daxdev;
iomap->flags = flags;
if (mapping_flags & IOMAP_DAX)
iomap->dax_dev = target->bt_daxdev;
else
iomap->bdev = target->bt_bdev;
iomap->flags = iomap_flags;
if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
@ -188,6 +194,7 @@ xfs_iomap_write_direct(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb,
unsigned int flags,
struct xfs_bmbt_irec *imap)
{
struct xfs_mount *mp = ip->i_mount;
@ -229,7 +236,7 @@ xfs_iomap_write_direct(
* the reserve block pool for bmbt block allocation if there is no space
* left but we need to do unwritten extent conversion.
*/
if (IS_DAX(VFS_I(ip))) {
if (flags & IOMAP_DAX) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
if (imap->br_state == XFS_EXT_UNWRITTEN) {
force = true;
@ -620,7 +627,7 @@ imap_needs_alloc(
imap->br_startblock == DELAYSTARTBLOCK)
return true;
/* we convert unwritten extents before copying the data for DAX */
if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN)
if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN)
return true;
return false;
}
@ -800,7 +807,7 @@ xfs_direct_write_iomap_begin(
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
allocate_blocks:
error = -EAGAIN;
@ -826,23 +833,24 @@ allocate_blocks:
xfs_iunlock(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
&imap);
flags, &imap);
if (error)
return error;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
iomap_flags | IOMAP_F_NEW);
out_found_cow:
xfs_iunlock(ip, lockmode);
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
if (imap.br_startblock != HOLESTARTBLOCK) {
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
if (error)
return error;
}
return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
out_unlock:
if (lockmode)
@ -1052,23 +1060,24 @@ retry:
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
found_imap:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
found_cow:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (imap.br_startoff <= offset_fsb) {
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
if (error)
return error;
return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
IOMAP_F_SHARED);
}
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@ -1177,7 +1186,8 @@ xfs_read_iomap_begin(
if (error)
return error;
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
shared ? IOMAP_F_SHARED : 0);
}
const struct iomap_ops xfs_read_iomap_ops = {
@ -1236,7 +1246,8 @@ xfs_seek_iomap_begin(
if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
IOMAP_F_SHARED);
/*
* This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed
@ -1258,7 +1269,7 @@ xfs_seek_iomap_begin(
imap.br_state = XFS_EXT_NORM;
done:
xfs_trim_extent(&imap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
@ -1305,9 +1316,40 @@ out_unlock:
if (error)
return error;
ASSERT(nimaps);
return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
}
const struct iomap_ops xfs_xattr_iomap_ops = {
.iomap_begin = xfs_xattr_iomap_begin,
};
int
xfs_zero_range(
struct xfs_inode *ip,
loff_t pos,
loff_t len,
bool *did_zero)
{
struct inode *inode = VFS_I(ip);
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
&xfs_direct_write_iomap_ops);
return iomap_zero_range(inode, pos, len, did_zero,
&xfs_buffered_write_iomap_ops);
}
int
xfs_truncate_page(
struct xfs_inode *ip,
loff_t pos,
bool *did_zero)
{
struct inode *inode = VFS_I(ip);
if (IS_DAX(inode))
return dax_truncate_page(inode, pos, did_zero,
&xfs_direct_write_iomap_ops);
return iomap_truncate_page(inode, pos, did_zero,
&xfs_buffered_write_iomap_ops);
}

View File

@ -12,13 +12,19 @@ struct xfs_inode;
struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap);
xfs_fileoff_t count_fsb, unsigned int flags,
struct xfs_bmbt_irec *imap);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
xfs_fileoff_t end_fsb);
int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *, u16);
int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
u16 iomap_flags);
int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
bool *did_zero);
int xfs_truncate_page(struct xfs_inode *ip, loff_t pos, bool *did_zero);
static inline xfs_filblks_t
xfs_aligned_fsb_count(

View File

@ -890,8 +890,8 @@ xfs_setattr_size(
*/
if (newsize > oldsize) {
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
&did_zeroing, &xfs_buffered_write_iomap_ops);
error = xfs_zero_range(ip, oldsize, newsize - oldsize,
&did_zeroing);
} else {
/*
* iomap won't detect a dirty page over an unwritten block (or a
@ -903,8 +903,7 @@ xfs_setattr_size(
newsize);
if (error)
return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_buffered_write_iomap_ops);
error = xfs_truncate_page(ip, newsize, &did_zeroing);
}
if (error)

View File

@ -155,7 +155,7 @@ xfs_fs_map_blocks(
xfs_iunlock(ip, lock_flags);
error = xfs_iomap_write_direct(ip, offset_fsb,
end_fsb - offset_fsb, &imap);
end_fsb - offset_fsb, 0, &imap);
if (error)
goto out_unlock;
@ -173,7 +173,7 @@ xfs_fs_map_blocks(
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
*device_generation = mp->m_generation;
return error;
out_unlock:

View File

@ -1272,8 +1272,7 @@ xfs_reflink_zero_posteof(
return 0;
trace_xfs_zero_eof(ip, isize, pos - isize);
return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
&xfs_buffered_write_iomap_ops);
return xfs_zero_range(ip, isize, pos - isize, NULL);
}
/*

View File

@ -331,13 +331,34 @@ xfs_set_inode_alloc(
return xfs_is_inode32(mp) ? maxagi : agcount;
}
static bool
xfs_buftarg_is_dax(
struct super_block *sb,
struct xfs_buftarg *bt)
static int
xfs_setup_dax_always(
struct xfs_mount *mp)
{
return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
bdev_nr_sectors(bt->bt_bdev));
if (!mp->m_ddev_targp->bt_daxdev &&
(!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
xfs_alert(mp,
"DAX unsupported by block device. Turning off DAX.");
goto disable_dax;
}
if (mp->m_super->s_blocksize != PAGE_SIZE) {
xfs_alert(mp,
"DAX not supported for blocksize. Turning off DAX.");
goto disable_dax;
}
if (xfs_has_reflink(mp)) {
xfs_alert(mp, "DAX and reflink cannot be used together!");
return -EINVAL;
}
xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
return 0;
disable_dax:
xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
return 0;
}
STATIC int
@ -370,26 +391,19 @@ STATIC void
xfs_close_devices(
struct xfs_mount *mp)
{
struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
xfs_free_buftarg(mp->m_logdev_targp);
xfs_blkdev_put(logdev);
fs_put_dax(dax_logdev);
}
if (mp->m_rtdev_targp) {
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
xfs_free_buftarg(mp->m_rtdev_targp);
xfs_blkdev_put(rtdev);
fs_put_dax(dax_rtdev);
}
xfs_free_buftarg(mp->m_ddev_targp);
fs_put_dax(dax_ddev);
}
/*
@ -407,8 +421,6 @@ xfs_open_devices(
struct xfs_mount *mp)
{
struct block_device *ddev = mp->m_super->s_bdev;
struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev);
struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL;
struct block_device *logdev = NULL, *rtdev = NULL;
int error;
@ -418,8 +430,7 @@ xfs_open_devices(
if (mp->m_logname) {
error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error)
goto out;
dax_logdev = fs_dax_get_by_bdev(logdev);
return error;
}
if (mp->m_rtname) {
@ -433,25 +444,24 @@ xfs_open_devices(
error = -EINVAL;
goto out_close_rtdev;
}
dax_rtdev = fs_dax_get_by_bdev(rtdev);
}
/*
* Setup xfs_mount buffer target pointers
*/
error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@ -467,14 +477,9 @@ xfs_open_devices(
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
xfs_blkdev_put(rtdev);
fs_put_dax(dax_rtdev);
out_close_logdev:
if (logdev && logdev != ddev) {
if (logdev && logdev != ddev)
xfs_blkdev_put(logdev);
fs_put_dax(dax_logdev);
}
out:
fs_put_dax(dax_ddev);
return error;
}
@ -1593,26 +1598,9 @@ xfs_fs_fill_super(
sb->s_flags |= SB_I_VERSION;
if (xfs_has_dax_always(mp)) {
bool rtdev_is_dax = false, datadev_is_dax;
xfs_warn(mp,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
if (mp->m_rtdev_targp)
rtdev_is_dax = xfs_buftarg_is_dax(sb,
mp->m_rtdev_targp);
if (!rtdev_is_dax && !datadev_is_dax) {
xfs_alert(mp,
"DAX unsupported by block device. Turning off DAX.");
xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
}
if (xfs_has_reflink(mp)) {
xfs_alert(mp,
"DAX and reflink cannot be used together!");
error = -EINVAL;
error = xfs_setup_dax_always(mp);
if (error)
goto out_filestream_unmount;
}
}
if (xfs_has_discard(mp)) {

View File

@ -6,14 +6,14 @@
#include <linux/mm.h>
#include <linux/radix-tree.h>
/* Flag for synchronous flush */
#define DAXDEV_F_SYNC (1UL << 0)
typedef unsigned long dax_entry_t;
struct iomap_ops;
struct iomap;
struct dax_device;
struct gendisk;
struct iomap_ops;
struct iomap_iter;
struct iomap;
struct dax_operations {
/*
* direct_access: translate a device-relative
@ -28,33 +28,18 @@ struct dax_operations {
*/
bool (*dax_supported)(struct dax_device *, struct block_device *, int,
sector_t, sector_t);
/* copy_from_iter: required operation for fs-dax direct-i/o */
size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
/* copy_to_iter: required operation for fs-dax direct-i/o */
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
/* zero_page_range: required operation. Zero page range */
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
};
#if IS_ENABLED(CONFIG_DAX)
struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops, unsigned long flags);
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
void put_dax(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
bool dax_write_cache_enabled(struct dax_device *dax_dev);
bool __dax_synchronous(struct dax_device *dax_dev);
static inline bool dax_synchronous(struct dax_device *dax_dev)
{
return __dax_synchronous(dax_dev);
}
void __set_dax_synchronous(struct dax_device *dax_dev);
static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
__set_dax_synchronous(dax_dev);
}
bool dax_synchronous(struct dax_device *dax_dev);
void set_dax_synchronous(struct dax_device *dax_dev);
/*
* Check if given mapping is supported by the file / underlying device.
*/
@ -68,8 +53,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
return dax_synchronous(dax_dev);
}
#else
static inline struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops, unsigned long flags)
static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops)
{
/*
* Callers should check IS_ENABLED(CONFIG_DAX) to know if this
@ -104,22 +89,38 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
}
#endif
void set_dax_nocache(struct dax_device *dax_dev);
void set_dax_nomc(struct dax_device *dax_dev);
struct writeback_control;
int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
#if IS_ENABLED(CONFIG_FS_DAX)
bool generic_fsdax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t sectors);
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len);
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
void dax_remove_host(struct gendisk *disk);
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
u64 *start_off);
static inline void fs_put_dax(struct dax_device *dax_dev)
{
put_dax(dax_dev);
}
#else
static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
{
return 0;
}
static inline void dax_remove_host(struct gendisk *disk)
{
}
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
u64 *start_off)
{
return NULL;
}
static inline void fs_put_dax(struct dax_device *dax_dev)
{
}
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
#if IS_ENABLED(CONFIG_FS_DAX)
int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc);
@ -128,24 +129,6 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else
#define generic_fsdax_supported NULL
static inline bool dax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t len)
{
return false;
}
static inline void fs_put_dax(struct dax_device *dax_dev)
{
}
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
return NULL;
}
static inline struct page *dax_layout_busy_page(struct address_space *mapping)
{
return NULL;
@ -174,6 +157,11 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
}
#endif
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
#if IS_ENABLED(CONFIG_DAX)
int dax_read_lock(void);
void dax_read_unlock(int id);
@ -208,7 +196,6 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);

View File

@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
*/
typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn);
typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages);
@ -200,8 +198,6 @@ struct target_type {
dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints;
dm_dax_direct_access_fn direct_access;
dm_dax_copy_iter_fn dax_copy_from_iter;
dm_dax_copy_iter_fn dax_copy_to_iter;
dm_dax_zero_page_range_fn dax_zero_page_range;
/* For internal device-mapper use. */

View File

@ -141,6 +141,11 @@ struct iomap_page_ops {
#define IOMAP_NOWAIT (1 << 5) /* do not block */
#define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */
#define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */
#ifdef CONFIG_FS_DAX
#define IOMAP_DAX (1 << 8) /* DAX mapping */
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
struct iomap_ops {
/*

View File

@ -72,16 +72,6 @@ struct dev_pagemap_ops {
*/
void (*page_free)(struct page *page);
/*
* Transition the refcount in struct dev_pagemap to the dead state.
*/
void (*kill)(struct dev_pagemap *pgmap);
/*
* Wait for refcount in struct dev_pagemap to be idle and reap it.
*/
void (*cleanup)(struct dev_pagemap *pgmap);
/*
* Used for private (un-addressable) device memory only. Must migrate
* the page back to a CPU accessible page.
@ -95,8 +85,7 @@ struct dev_pagemap_ops {
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
* @altmap: pre-allocated/reserved memory for vmemmap allocations
* @ref: reference count that pins the devm_memremap_pages() mapping
* @internal_ref: internal reference if @ref is not provided by the caller
* @done: completion for @internal_ref
* @done: completion for @ref
* @type: memory type: see MEMORY_* in memory_hotplug.h
* @flags: PGMAP_* flags to specify defailed behavior
* @ops: method table
@ -109,8 +98,7 @@ struct dev_pagemap_ops {
*/
struct dev_pagemap {
struct vmem_altmap altmap;
struct percpu_ref *ref;
struct percpu_ref internal_ref;
struct percpu_ref ref;
struct completion done;
enum memory_type type;
unsigned int flags;
@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void)
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{
if (pgmap)
percpu_ref_put(pgmap->ref);
percpu_ref_put(&pgmap->ref);
}
#endif /* _LINUX_MEMREMAP_H_ */

View File

@ -203,7 +203,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/*
* Note, users like pmem that depend on the stricter semantics of
* copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
* _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
* destination is flushed from the cache on return.
*/
@ -218,24 +218,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#define _copy_mc_to_iter _copy_to_iter
#endif
static __always_inline __must_check
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, false)))
return 0;
else
return _copy_from_iter_flushcache(addr, bytes, i);
}
static __always_inline __must_check
size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, true)))
return 0;
else
return _copy_mc_to_iter(addr, bytes, i);
}
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);

View File

@ -112,30 +112,6 @@ static unsigned long pfn_next(unsigned long pfn)
#define for_each_device_pfn(pfn, map, i) \
for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
static void dev_pagemap_kill(struct dev_pagemap *pgmap)
{
if (pgmap->ops && pgmap->ops->kill)
pgmap->ops->kill(pgmap);
else
percpu_ref_kill(pgmap->ref);
}
static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
{
if (pgmap->ops && pgmap->ops->cleanup) {
pgmap->ops->cleanup(pgmap);
} else {
wait_for_completion(&pgmap->done);
percpu_ref_exit(pgmap->ref);
}
/*
* Undo the pgmap ref assignment for the internal case as the
* caller may re-enable the same pgmap.
*/
if (pgmap->ref == &pgmap->internal_ref)
pgmap->ref = NULL;
}
static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
{
struct range *range = &pgmap->ranges[range_id];
@ -167,11 +143,12 @@ void memunmap_pages(struct dev_pagemap *pgmap)
unsigned long pfn;
int i;
dev_pagemap_kill(pgmap);
percpu_ref_kill(&pgmap->ref);
for (i = 0; i < pgmap->nr_range; i++)
for_each_device_pfn(pfn, pgmap, i)
put_page(pfn_to_page(pfn));
dev_pagemap_cleanup(pgmap);
wait_for_completion(&pgmap->done);
percpu_ref_exit(&pgmap->ref);
for (i = 0; i < pgmap->nr_range; i++)
pageunmap_range(pgmap, i);
@ -188,8 +165,7 @@ static void devm_memremap_pages_release(void *data)
static void dev_pagemap_percpu_release(struct percpu_ref *ref)
{
struct dev_pagemap *pgmap =
container_of(ref, struct dev_pagemap, internal_ref);
struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
complete(&pgmap->done);
}
@ -295,8 +271,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), pgmap);
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id)
- pfn_first(pgmap, range_id));
percpu_ref_get_many(&pgmap->ref,
pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
return 0;
err_add_memory:
@ -362,22 +338,11 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
break;
}
if (!pgmap->ref) {
if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
return ERR_PTR(-EINVAL);
init_completion(&pgmap->done);
error = percpu_ref_init(&pgmap->internal_ref,
dev_pagemap_percpu_release, 0, GFP_KERNEL);
if (error)
return ERR_PTR(error);
pgmap->ref = &pgmap->internal_ref;
} else {
if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
}
}
init_completion(&pgmap->done);
error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
GFP_KERNEL);
if (error)
return ERR_PTR(error);
devmap_managed_enable_get(pgmap);
@ -486,7 +451,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
/* fall back to slow path lookup */
rcu_read_lock();
pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
pgmap = NULL;
rcu_read_unlock();

View File

@ -35,8 +35,6 @@ obj-$(CONFIG_DAX) += dax.o
endif
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
nfit-y := $(ACPI_SRC)/core.o
nfit-y += $(ACPI_SRC)/intel.o
@ -67,12 +65,8 @@ device_dax-y += dax-dev.o
device_dax-y += device_dax_test.o
device_dax-y += config_check.o
dax_pmem-y := $(DAX_SRC)/pmem/pmem.o
dax_pmem-y := $(DAX_SRC)/pmem.o
dax_pmem-y += dax_pmem_test.o
dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
dax_pmem_core-y += dax_pmem_core_test.o
dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
dax_pmem_compat-y += dax_pmem_compat_test.o
dax_pmem-y += config_check.o
libnvdimm-y := $(NVDIMM_SRC)/core.o

View File

@ -1,8 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2019 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(dax_pmem_compat);

View File

@ -1,8 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2019 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(dax_pmem_core);

View File

@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap)
{
struct dev_pagemap *pgmap = _pgmap;
WARN_ON(!pgmap || !pgmap->ref);
WARN_ON(!pgmap);
if (pgmap->ops && pgmap->ops->kill)
pgmap->ops->kill(pgmap);
else
percpu_ref_kill(pgmap->ref);
percpu_ref_kill(&pgmap->ref);
if (pgmap->ops && pgmap->ops->cleanup) {
pgmap->ops->cleanup(pgmap);
} else {
wait_for_completion(&pgmap->done);
percpu_ref_exit(pgmap->ref);
}
wait_for_completion(&pgmap->done);
percpu_ref_exit(&pgmap->ref);
}
static void dev_pagemap_percpu_release(struct percpu_ref *ref)
{
struct dev_pagemap *pgmap =
container_of(ref, struct dev_pagemap, internal_ref);
struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
complete(&pgmap->done);
}
@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (!nfit_res)
return devm_memremap_pages(dev, pgmap);
if (!pgmap->ref) {
if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
return ERR_PTR(-EINVAL);
init_completion(&pgmap->done);
error = percpu_ref_init(&pgmap->internal_ref,
dev_pagemap_percpu_release, 0, GFP_KERNEL);
if (error)
return ERR_PTR(error);
pgmap->ref = &pgmap->internal_ref;
} else {
if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
}
}
init_completion(&pgmap->done);
error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
GFP_KERNEL);
if (error)
return ERR_PTR(error);
error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (error)

View File

@ -1054,10 +1054,6 @@ static __init int ndtest_init(void)
libnvdimm_test();
device_dax_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
dax_pmem_compat_test();
#endif
nfit_test_setup(ndtest_resource_lookup, NULL);

View File

@ -3300,10 +3300,6 @@ static __init int nfit_test_init(void)
acpi_nfit_test();
device_dax_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
dax_pmem_compat_test();
#endif
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);