Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"The bulk of this has appeared in -next and independently received a
build success notification from the kbuild robot. The 'for-4.5/block-
dax' topic branch was rebased over the weekend to drop the "block
device end-of-life" rework that Al would like to see re-implemented
with a notifier, and to address bug reports against the badblocks
integration.
There is pending feedback against "libnvdimm: Add a poison list and
export badblocks" received last week. Linda identified some localized
fixups that we will handle incrementally.
Summary:
- Media error handling: The 'badblocks' implementation that
originated in md-raid is up-levelled to a generic capability of a
block device. This initial implementation is limited to being
consulted in the pmem block-i/o path. Later, 'badblocks' will be
consulted when creating dax mappings.
- Raw block device dax: For virtualization and other cases that want
large contiguous mappings of persistent memory, add the capability
to dax-mmap a block device directly.
- Increased /dev/mem restrictions: Add an option to treat all
io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access
while a driver is actively using an address range. This behavior
is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be
overridden by the existing "iomem=relaxed" kernel command line
option.
- Miscellaneous fixes include a 'pfn'-device huge page alignment fix,
block device shutdown crash fix, and other small libnvdimm fixes"
* tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits)
block: kill disk_{check|set|clear|alloc}_badblocks
libnvdimm, pmem: nvdimm_read_bytes() badblocks support
pmem, dax: disable dax in the presence of bad blocks
pmem: fail io-requests to known bad blocks
libnvdimm: convert to statically allocated badblocks
libnvdimm: don't fail init for full badblocks list
block, badblocks: introduce devm_init_badblocks
block: clarify badblocks lifetime
badblocks: rename badblocks_free to badblocks_exit
libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h
libnvdimm: Add a poison list and export badblocks
nfit_test: Enable DSMs for all test NFITs
md: convert to use the generic badblocks code
block: Add badblock management for gendisks
badblocks: Add core badblock management code
block: fix del_gendisk() vs blkdev_ioctl crash
block: enable dax for raw block devices
block: introduce bdev_file_inode()
restrict /dev/mem to idle io memory ranges
arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug
...
This commit is contained in:
122
fs/block_dev.c
122
fs/block_dev.c
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct inode *bdev_file_inode(struct file *file)
|
||||
{
|
||||
return file->f_mapping->host;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct inode *inode = bdev_file_inode(file);
|
||||
|
||||
if (IS_DAX(inode))
|
||||
return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
|
||||
@@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
|
||||
*/
|
||||
static loff_t block_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *bd_inode = file->f_mapping->host;
|
||||
struct inode *bd_inode = bdev_file_inode(file);
|
||||
loff_t retval;
|
||||
|
||||
mutex_lock(&bd_inode->i_mutex);
|
||||
@@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
|
||||
|
||||
int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
struct inode *bd_inode = filp->f_mapping->host;
|
||||
struct inode *bd_inode = bdev_file_inode(filp);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
int error;
|
||||
|
||||
@@ -1224,8 +1229,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
|
||||
if (!blkdev_dax_capable(bdev))
|
||||
bdev->bd_inode->i_flags &= ~S_DAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the device is invalidated, rescan partition
|
||||
@@ -1239,6 +1247,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
else if (ret == -ENOMEDIUM)
|
||||
invalidate_partitions(disk, bdev);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_clear;
|
||||
} else {
|
||||
@@ -1259,12 +1268,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
goto out_clear;
|
||||
}
|
||||
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
|
||||
/*
|
||||
* If the partition is not aligned on a page
|
||||
* boundary, we can't do dax I/O to it.
|
||||
*/
|
||||
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
|
||||
(bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
|
||||
if (!blkdev_dax_capable(bdev))
|
||||
bdev->bd_inode->i_flags &= ~S_DAX;
|
||||
}
|
||||
} else {
|
||||
@@ -1599,14 +1603,14 @@ EXPORT_SYMBOL(blkdev_put);
|
||||
|
||||
static int blkdev_close(struct inode * inode, struct file * filp)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(filp->f_mapping->host);
|
||||
struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
|
||||
blkdev_put(bdev, filp->f_mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
struct block_device *bdev = I_BDEV(bdev_file_inode(file));
|
||||
fmode_t mode = file->f_mode;
|
||||
|
||||
/*
|
||||
@@ -1631,7 +1635,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *bd_inode = file->f_mapping->host;
|
||||
struct inode *bd_inode = bdev_file_inode(file);
|
||||
loff_t size = i_size_read(bd_inode);
|
||||
struct blk_plug plug;
|
||||
ssize_t ret;
|
||||
@@ -1663,7 +1667,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter);
|
||||
ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *bd_inode = file->f_mapping->host;
|
||||
struct inode *bd_inode = bdev_file_inode(file);
|
||||
loff_t size = i_size_read(bd_inode);
|
||||
loff_t pos = iocb->ki_pos;
|
||||
|
||||
@@ -1702,13 +1706,101 @@ static const struct address_space_operations def_blk_aops = {
|
||||
.is_dirty_writeback = buffer_check_dirty_writeback,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
/*
|
||||
* In the raw block case we do not need to contend with truncation nor
|
||||
* unwritten file extents. Without those concerns there is no need for
|
||||
* additional locking beyond the mmap_sem context that these routines
|
||||
* are already executing under.
|
||||
*
|
||||
* Note, there is no protection if the block device is dynamically
|
||||
* resized (partition grow/shrink) during a fault. A stable block device
|
||||
* size is already not enforced in the blkdev_direct_IO path.
|
||||
*
|
||||
* For DAX, it is the responsibility of the block device driver to
|
||||
* ensure the whole-disk device size is stable while requests are in
|
||||
* flight.
|
||||
*
|
||||
* Finally, unlike the filemap_page_mkwrite() case there is no
|
||||
* filesystem superblock to sync against freezing. We still include a
|
||||
* pfn_mkwrite callback for dax drivers to receive write fault
|
||||
* notifications.
|
||||
*/
|
||||
static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
return __dax_fault(vma, vmf, blkdev_get_block, NULL);
|
||||
}
|
||||
|
||||
static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd, unsigned int flags)
|
||||
{
|
||||
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
|
||||
}
|
||||
|
||||
static void blkdev_vm_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
|
||||
mutex_lock(&bd_inode->i_mutex);
|
||||
bdev->bd_map_count++;
|
||||
mutex_unlock(&bd_inode->i_mutex);
|
||||
}
|
||||
|
||||
static void blkdev_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
|
||||
mutex_lock(&bd_inode->i_mutex);
|
||||
bdev->bd_map_count--;
|
||||
mutex_unlock(&bd_inode->i_mutex);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct blkdev_dax_vm_ops = {
|
||||
.open = blkdev_vm_open,
|
||||
.close = blkdev_vm_close,
|
||||
.fault = blkdev_dax_fault,
|
||||
.pmd_fault = blkdev_dax_pmd_fault,
|
||||
.pfn_mkwrite = blkdev_dax_fault,
|
||||
};
|
||||
|
||||
static const struct vm_operations_struct blkdev_default_vm_ops = {
|
||||
.open = blkdev_vm_open,
|
||||
.close = blkdev_vm_close,
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
};
|
||||
|
||||
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(file);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
|
||||
file_accessed(file);
|
||||
mutex_lock(&bd_inode->i_mutex);
|
||||
bdev->bd_map_count++;
|
||||
if (IS_DAX(bd_inode)) {
|
||||
vma->vm_ops = &blkdev_dax_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
|
||||
} else {
|
||||
vma->vm_ops = &blkdev_default_vm_ops;
|
||||
}
|
||||
mutex_unlock(&bd_inode->i_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#define blkdev_mmap generic_file_mmap
|
||||
#endif
|
||||
|
||||
const struct file_operations def_blk_fops = {
|
||||
.open = blkdev_open,
|
||||
.release = blkdev_close,
|
||||
.llseek = block_llseek,
|
||||
.read_iter = blkdev_read_iter,
|
||||
.write_iter = blkdev_write_iter,
|
||||
.mmap = generic_file_mmap,
|
||||
.mmap = blkdev_mmap,
|
||||
.fsync = blkdev_fsync,
|
||||
.unlocked_ioctl = block_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
||||
Reference in New Issue
Block a user