From 3b0fe47805802216087259b07de691ef47ff6fbc Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jan 2016 16:22:45 +1100 Subject: [PATCH 1/2] xfs: Don't use reserved blocks for data blocks with DAX Commit 1ca1915 ("xfs: Don't use unwritten extents for DAX") enabled the DAX allocation call to dip into the reserve pool in case it was converting unwritten extents rather than allocating blocks. This was a direct copy of the unwritten extent conversion code, but had an unintended side effect of allowing normal data block allocation to use the reserve pool. Hence normal block allocation could deplete the reserve pool and prevent unwritten extent conversion at ENOSPC, hence violating fallocate guarantees on preallocated space. Fix it by checking whether the incoming map from __xfs_get_blocks() spans an unwritten extent and only use the reserve pool if the allocation covers an unwritten extent. Signed-off-by: Dave Chinner Tested-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f4f5b43cf647..9ed146b96856 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -203,15 +203,20 @@ xfs_iomap_write_direct( * this outside the transaction context, but if we commit and then crash * we may not have zeroed the blocks and this will be exposed on * recovery of the allocation. Hence we must zero before commit. + * * Further, if we are mapping unwritten extents here, we need to zero * and convert them to written so that we don't need an unwritten extent * callback for DAX. This also means that we need to be able to dip into - * the reserve block pool if there is no space left but we need to do - * unwritten extent conversion. + * the reserve block pool for bmbt block allocation if there is no space + * left but we need to do unwritten extent conversion. */ + if (IS_DAX(VFS_I(ip))) { bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; - tp->t_flags |= XFS_TRANS_RESERVE; + if (ISUNWRITTEN(imap)) { + tp->t_flags |= XFS_TRANS_RESERVE; + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; + } } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, resrtextents); From a6d7636e8d0fd94fd1937db91d5b06a91fa85dde Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jan 2016 16:28:25 +1100 Subject: [PATCH 2/2] xfs: fix recursive splice read locking with DAX Doing a splice read (generic/249) generates a lockdep splat because we recursively lock the inode iolock in this path: SyS_sendfile64 do_sendfile do_splice_direct splice_direct_to_actor do_splice_to xfs_file_splice_read <<<<<< lock here default_file_splice_read vfs_readv do_readv_writev do_iter_readv_writev xfs_file_read_iter <<<<<< then here The issue here is that for DAX inodes we need to avoid the page cache path and hence simply push it into the normal read path. Unfortunately, we can't tell down at xfs_file_read_iter() whether we are being called from the splice path and hence we cannot avoid the locking at this layer. Hence we simply have to drop the inode locking at the higher splice layer for DAX. Signed-off-by: Dave Chinner Tested-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f5392ab2def1..ebe9b8290a70 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -402,19 +402,26 @@ xfs_file_splice_read( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); - /* for dax, we need to avoid the page cache */ - if (IS_DAX(VFS_I(ip))) - ret = default_file_splice_read(infilp, ppos, pipe, count, flags); - else - ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); + /* + * DAX inodes cannot ues the page cache for splice, so we have to push + * them through the VFS IO path. This means it goes through + * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we + * cannot lock the splice operation at this level for DAX inodes. + */ + if (IS_DAX(VFS_I(ip))) { + ret = default_file_splice_read(infilp, ppos, pipe, count, + flags); + goto out; + } + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); +out: if (ret > 0) XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); - - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }