mirror of
https://github.com/torvalds/linux.git
synced 2024-12-30 14:52:05 +00:00
e58ac1770d
Add a new enum and a xfs_dir2_format helper that returns it to allow the code to switch on the format of a directory in a single operation and switch all helpers of xfs_dir2_isblock and xfs_dir2_isleaf to it. This also removes the explicit xfs_iread_extents call in a few of the call sites given that xfs_bmap_last_offset already takes care of it underneath. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
1236 lines
34 KiB
C
1236 lines
34 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_icache.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_exchmaps.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_bmap_btree.h"
|
|
#include "xfs_trans_space.h"
|
|
#include "xfs_error.h"
|
|
#include "xfs_errortag.h"
|
|
#include "xfs_health.h"
|
|
#include "xfs_exchmaps_item.h"
|
|
#include "xfs_da_format.h"
|
|
#include "xfs_da_btree.h"
|
|
#include "xfs_attr_leaf.h"
|
|
#include "xfs_attr.h"
|
|
#include "xfs_dir2_priv.h"
|
|
#include "xfs_dir2.h"
|
|
#include "xfs_symlink_remote.h"
|
|
|
|
struct kmem_cache *xfs_exchmaps_intent_cache;
|
|
|
|
/* bmbt mappings adjacent to a pair of records. */
|
|
struct xfs_exchmaps_adjacent {
|
|
struct xfs_bmbt_irec left1;
|
|
struct xfs_bmbt_irec right1;
|
|
struct xfs_bmbt_irec left2;
|
|
struct xfs_bmbt_irec right2;
|
|
};
|
|
|
|
#define ADJACENT_INIT { \
|
|
.left1 = { .br_startblock = HOLESTARTBLOCK }, \
|
|
.right1 = { .br_startblock = HOLESTARTBLOCK }, \
|
|
.left2 = { .br_startblock = HOLESTARTBLOCK }, \
|
|
.right2 = { .br_startblock = HOLESTARTBLOCK }, \
|
|
}
|
|
|
|
/* Information to reset reflink flag / CoW fork state after an exchange. */
|
|
|
|
/*
|
|
* If the reflink flag is set on either inode, make sure it has an incore CoW
|
|
* fork, since all reflink inodes must have them. If there's a CoW fork and it
|
|
* has mappings in it, make sure the inodes are tagged appropriately so that
|
|
* speculative preallocations can be GC'd if we run low of space.
|
|
*/
|
|
static inline void
|
|
xfs_exchmaps_ensure_cowfork(
|
|
struct xfs_inode *ip)
|
|
{
|
|
struct xfs_ifork *cfork;
|
|
|
|
if (xfs_is_reflink_inode(ip))
|
|
xfs_ifork_init_cow(ip);
|
|
|
|
cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
|
|
if (!cfork)
|
|
return;
|
|
if (cfork->if_bytes > 0)
|
|
xfs_inode_set_cowblocks_tag(ip);
|
|
else
|
|
xfs_inode_clear_cowblocks_tag(ip);
|
|
}
|
|
|
|
/*
|
|
* Adjust the on-disk inode size upwards if needed so that we never add
|
|
* mappings into the file past EOF. This is crucial so that log recovery won't
|
|
* get confused by the sudden appearance of post-eof mappings.
|
|
*/
|
|
STATIC void
|
|
xfs_exchmaps_update_size(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip,
|
|
struct xfs_bmbt_irec *imap,
|
|
xfs_fsize_t new_isize)
|
|
{
|
|
struct xfs_mount *mp = tp->t_mountp;
|
|
xfs_fsize_t len;
|
|
|
|
if (new_isize < 0)
|
|
return;
|
|
|
|
len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
|
|
new_isize);
|
|
|
|
if (len <= ip->i_disk_size)
|
|
return;
|
|
|
|
trace_xfs_exchmaps_update_inode_size(ip, len);
|
|
|
|
ip->i_disk_size = len;
|
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
|
}
|
|
|
|
/* Advance the incore state tracking after exchanging a mapping. */
|
|
static inline void
|
|
xmi_advance(
|
|
struct xfs_exchmaps_intent *xmi,
|
|
const struct xfs_bmbt_irec *irec)
|
|
{
|
|
xmi->xmi_startoff1 += irec->br_blockcount;
|
|
xmi->xmi_startoff2 += irec->br_blockcount;
|
|
xmi->xmi_blockcount -= irec->br_blockcount;
|
|
}
|
|
|
|
/* Do we still have more mappings to exchange? */
|
|
static inline bool
|
|
xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
return xmi->xmi_blockcount > 0;
|
|
}
|
|
|
|
/* Do we have post-operation cleanups to perform? */
|
|
static inline bool
|
|
xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
|
|
XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
|
|
__XFS_EXCHMAPS_INO2_SHORTFORM);
|
|
}
|
|
|
|
/* Check all mappings to make sure we can actually exchange them. */
|
|
int
|
|
xfs_exchmaps_check_forks(
|
|
struct xfs_mount *mp,
|
|
const struct xfs_exchmaps_req *req)
|
|
{
|
|
struct xfs_ifork *ifp1, *ifp2;
|
|
int whichfork = xfs_exchmaps_reqfork(req);
|
|
|
|
/* No fork? */
|
|
ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
|
|
ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
|
|
if (!ifp1 || !ifp2)
|
|
return -EINVAL;
|
|
|
|
/* We don't know how to exchange local format forks. */
|
|
if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
|
|
ifp2->if_format == XFS_DINODE_FMT_LOCAL)
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_XFS_QUOTA
|
|
/* Log the actual updates to the quota accounting. */
|
|
static inline void
|
|
xfs_exchmaps_update_quota(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi,
|
|
struct xfs_bmbt_irec *irec1,
|
|
struct xfs_bmbt_irec *irec2)
|
|
{
|
|
int64_t ip1_delta = 0, ip2_delta = 0;
|
|
unsigned int qflag;
|
|
|
|
qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
|
|
XFS_TRANS_DQ_BCOUNT;
|
|
|
|
if (xfs_bmap_is_real_extent(irec1)) {
|
|
ip1_delta -= irec1->br_blockcount;
|
|
ip2_delta += irec1->br_blockcount;
|
|
}
|
|
|
|
if (xfs_bmap_is_real_extent(irec2)) {
|
|
ip1_delta += irec2->br_blockcount;
|
|
ip2_delta -= irec2->br_blockcount;
|
|
}
|
|
|
|
xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
|
|
xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
|
|
}
|
|
#else
|
|
# define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2) ((void)0)
|
|
#endif
|
|
|
|
/* Decide if we want to skip this mapping from file1. */
|
|
static inline bool
|
|
xfs_exchmaps_can_skip_mapping(
|
|
struct xfs_exchmaps_intent *xmi,
|
|
struct xfs_bmbt_irec *irec)
|
|
{
|
|
struct xfs_mount *mp = xmi->xmi_ip1->i_mount;
|
|
|
|
/* Do not skip this mapping if the caller did not tell us to. */
|
|
if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
|
|
return false;
|
|
|
|
/* Do not skip mapped, written mappings. */
|
|
if (xfs_bmap_is_written_extent(irec))
|
|
return false;
|
|
|
|
/*
|
|
* The mapping is unwritten or a hole. It cannot be a delalloc
|
|
* reservation because we already excluded those. It cannot be an
|
|
* unwritten extent with dirty page cache because we flushed the page
|
|
* cache. For files where the allocation unit is 1FSB (files on the
|
|
* data dev, rt files if the extent size is 1FSB), we can safely
|
|
* skip this mapping.
|
|
*/
|
|
if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
|
|
return true;
|
|
|
|
/*
|
|
* For a realtime file with a multi-fsb allocation unit, the decision
|
|
* is trickier because we can only swap full allocation units.
|
|
* Unwritten mappings can appear in the middle of an rtx if the rtx is
|
|
* partially written, but they can also appear for preallocations.
|
|
*
|
|
* If the mapping is a hole, skip it entirely. Holes should align with
|
|
* rtx boundaries.
|
|
*/
|
|
if (!xfs_bmap_is_real_extent(irec))
|
|
return true;
|
|
|
|
/*
|
|
* All mappings below this point are unwritten.
|
|
*
|
|
* - If the beginning is not aligned to an rtx, trim the end of the
|
|
* mapping so that it does not cross an rtx boundary, and swap it.
|
|
*
|
|
* - If both ends are aligned to an rtx, skip the entire mapping.
|
|
*/
|
|
if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
|
|
xfs_fileoff_t new_end;
|
|
|
|
new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
|
|
irec->br_blockcount = min(irec->br_blockcount,
|
|
new_end - irec->br_startoff);
|
|
return false;
|
|
}
|
|
if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
|
|
return true;
|
|
|
|
/*
|
|
* All mappings below this point are unwritten, start on an rtx
|
|
* boundary, and do not end on an rtx boundary.
|
|
*
|
|
* - If the mapping is longer than one rtx, trim the end of the mapping
|
|
* down to an rtx boundary and skip it.
|
|
*
|
|
* - The mapping is shorter than one rtx. Swap it.
|
|
*/
|
|
if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
|
|
xfs_fileoff_t new_end;
|
|
|
|
new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
|
|
mp->m_sb.sb_rextsize);
|
|
irec->br_blockcount = new_end - irec->br_startoff;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Walk forward through the file ranges in @xmi until we find two different
|
|
* mappings to exchange. If there is work to do, return the mappings;
|
|
* otherwise we've reached the end of the range and xmi_blockcount will be
|
|
* zero.
|
|
*
|
|
* If the walk skips over a pair of mappings to the same storage, save them as
|
|
* the left records in @adj (if provided) so that the simulation phase can
|
|
* avoid an extra lookup.
|
|
*/
|
|
static int
|
|
xfs_exchmaps_find_mappings(
|
|
struct xfs_exchmaps_intent *xmi,
|
|
struct xfs_bmbt_irec *irec1,
|
|
struct xfs_bmbt_irec *irec2,
|
|
struct xfs_exchmaps_adjacent *adj)
|
|
{
|
|
int nimaps;
|
|
int bmap_flags;
|
|
int error;
|
|
|
|
bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
|
|
|
|
for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
|
|
/* Read mapping from the first file */
|
|
nimaps = 1;
|
|
error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
|
|
xmi->xmi_blockcount, irec1, &nimaps,
|
|
bmap_flags);
|
|
if (error)
|
|
return error;
|
|
if (nimaps != 1 ||
|
|
irec1->br_startblock == DELAYSTARTBLOCK ||
|
|
irec1->br_startoff != xmi->xmi_startoff1) {
|
|
/*
|
|
* We should never get no mapping or a delalloc mapping
|
|
* or something that doesn't match what we asked for,
|
|
* since the caller flushed both inodes and we hold the
|
|
* ILOCKs for both inodes.
|
|
*/
|
|
ASSERT(0);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
|
|
trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
|
|
continue;
|
|
}
|
|
|
|
/* Read mapping from the second file */
|
|
nimaps = 1;
|
|
error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
|
|
irec1->br_blockcount, irec2, &nimaps,
|
|
bmap_flags);
|
|
if (error)
|
|
return error;
|
|
if (nimaps != 1 ||
|
|
irec2->br_startblock == DELAYSTARTBLOCK ||
|
|
irec2->br_startoff != xmi->xmi_startoff2) {
|
|
/*
|
|
* We should never get no mapping or a delalloc mapping
|
|
* or something that doesn't match what we asked for,
|
|
* since the caller flushed both inodes and we hold the
|
|
* ILOCKs for both inodes.
|
|
*/
|
|
ASSERT(0);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* We can only exchange as many blocks as the smaller of the
|
|
* two mapping maps.
|
|
*/
|
|
irec1->br_blockcount = min(irec1->br_blockcount,
|
|
irec2->br_blockcount);
|
|
|
|
trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
|
|
trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
|
|
|
|
/* We found something to exchange, so return it. */
|
|
if (irec1->br_startblock != irec2->br_startblock)
|
|
return 0;
|
|
|
|
/*
|
|
* Two mappings pointing to the same physical block must not
|
|
* have different states; that's filesystem corruption. Move
|
|
* on to the next mapping if they're both holes or both point
|
|
* to the same physical space extent.
|
|
*/
|
|
if (irec1->br_state != irec2->br_state) {
|
|
xfs_bmap_mark_sick(xmi->xmi_ip1,
|
|
xfs_exchmaps_whichfork(xmi));
|
|
xfs_bmap_mark_sick(xmi->xmi_ip2,
|
|
xfs_exchmaps_whichfork(xmi));
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
/*
|
|
* Save the mappings if we're estimating work and skipping
|
|
* these identical mappings.
|
|
*/
|
|
if (adj) {
|
|
memcpy(&adj->left1, irec1, sizeof(*irec1));
|
|
memcpy(&adj->left2, irec2, sizeof(*irec2));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Exchange these two mappings. */
|
|
static void
|
|
xfs_exchmaps_one_step(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi,
|
|
struct xfs_bmbt_irec *irec1,
|
|
struct xfs_bmbt_irec *irec2)
|
|
{
|
|
int whichfork = xfs_exchmaps_whichfork(xmi);
|
|
|
|
xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
|
|
|
|
/* Remove both mappings. */
|
|
xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
|
|
xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
|
|
|
|
/*
|
|
* Re-add both mappings. We exchange the file offsets between the two
|
|
* maps and add the opposite map, which has the effect of filling the
|
|
* logical offsets we just unmapped, but with with the physical mapping
|
|
* information exchanged.
|
|
*/
|
|
swap(irec1->br_startoff, irec2->br_startoff);
|
|
xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
|
|
xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
|
|
|
|
/* Make sure we're not adding mappings past EOF. */
|
|
if (whichfork == XFS_DATA_FORK) {
|
|
xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
|
|
xmi->xmi_isize1);
|
|
xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
|
|
xmi->xmi_isize2);
|
|
}
|
|
|
|
/*
|
|
* Advance our cursor and exit. The caller (either defer ops or log
|
|
* recovery) will log the XMD item, and if *blockcount is nonzero, it
|
|
* will log a new XMI item for the remainder and call us back.
|
|
*/
|
|
xmi_advance(xmi, irec1);
|
|
}
|
|
|
|
/* Convert inode2's leaf attr fork back to shortform, if possible.. */
|
|
STATIC int
|
|
xfs_exchmaps_attr_to_sf(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
struct xfs_da_args args = {
|
|
.dp = xmi->xmi_ip2,
|
|
.geo = tp->t_mountp->m_attr_geo,
|
|
.whichfork = XFS_ATTR_FORK,
|
|
.trans = tp,
|
|
.owner = xmi->xmi_ip2->i_ino,
|
|
};
|
|
struct xfs_buf *bp;
|
|
int forkoff;
|
|
int error;
|
|
|
|
if (!xfs_attr_is_leaf(xmi->xmi_ip2))
|
|
return 0;
|
|
|
|
error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
|
|
&bp);
|
|
if (error)
|
|
return error;
|
|
|
|
forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
|
|
if (forkoff == 0)
|
|
return 0;
|
|
|
|
return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
|
|
}
|
|
|
|
/* Convert inode2's block dir fork back to shortform, if possible.. */
|
|
STATIC int
|
|
xfs_exchmaps_dir_to_sf(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
struct xfs_da_args args = {
|
|
.dp = xmi->xmi_ip2,
|
|
.geo = tp->t_mountp->m_dir_geo,
|
|
.whichfork = XFS_DATA_FORK,
|
|
.trans = tp,
|
|
.owner = xmi->xmi_ip2->i_ino,
|
|
};
|
|
struct xfs_dir2_sf_hdr sfh;
|
|
struct xfs_buf *bp;
|
|
int size;
|
|
int error = 0;
|
|
|
|
if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
|
|
return error;
|
|
|
|
error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
|
|
if (error)
|
|
return error;
|
|
|
|
size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
|
|
if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
|
|
return 0;
|
|
|
|
return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
|
|
}
|
|
|
|
/* Convert inode2's remote symlink target back to shortform, if possible. */
|
|
STATIC int
|
|
xfs_exchmaps_link_to_sf(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
struct xfs_inode *ip = xmi->xmi_ip2;
|
|
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
|
char *buf;
|
|
int error;
|
|
|
|
if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
|
|
ip->i_disk_size > xfs_inode_data_fork_size(ip))
|
|
return 0;
|
|
|
|
/* Read the current symlink target into a buffer. */
|
|
buf = kmalloc(ip->i_disk_size + 1,
|
|
GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
|
|
if (!buf) {
|
|
ASSERT(0);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
error = xfs_symlink_remote_read(ip, buf);
|
|
if (error)
|
|
goto free;
|
|
|
|
/* Remove the blocks. */
|
|
error = xfs_symlink_remote_truncate(tp, ip);
|
|
if (error)
|
|
goto free;
|
|
|
|
/* Convert fork to local format and log our changes. */
|
|
xfs_idestroy_fork(ifp);
|
|
ifp->if_bytes = 0;
|
|
ifp->if_format = XFS_DINODE_FMT_LOCAL;
|
|
xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
|
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
|
|
free:
|
|
kfree(buf);
|
|
return error;
|
|
}
|
|
|
|
/* Clear the reflink flag after an exchange. */
|
|
static inline void
|
|
xfs_exchmaps_clear_reflink(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip)
|
|
{
|
|
trace_xfs_reflink_unset_inode_flag(ip);
|
|
|
|
ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
|
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
|
}
|
|
|
|
/* Finish whatever work might come after an exchange operation. */
|
|
static int
|
|
xfs_exchmaps_do_postop_work(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
|
|
int error = 0;
|
|
|
|
if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
|
|
error = xfs_exchmaps_attr_to_sf(tp, xmi);
|
|
else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
|
|
error = xfs_exchmaps_dir_to_sf(tp, xmi);
|
|
else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
|
|
error = xfs_exchmaps_link_to_sf(tp, xmi);
|
|
xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
|
|
xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
|
|
xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
|
|
}
|
|
|
|
if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
|
|
xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
|
|
xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Finish one step in a mapping exchange operation, possibly relogging. */
|
|
int
|
|
xfs_exchmaps_finish_one(
|
|
struct xfs_trans *tp,
|
|
struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
struct xfs_bmbt_irec irec1, irec2;
|
|
int error;
|
|
|
|
if (xmi_has_more_exchange_work(xmi)) {
|
|
/*
|
|
* If the operation state says that some range of the files
|
|
* have not yet been exchanged, look for mappings in that range
|
|
* to exchange. If we find some mappings, exchange them.
|
|
*/
|
|
error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
|
|
if (error)
|
|
return error;
|
|
|
|
if (xmi_has_more_exchange_work(xmi))
|
|
xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
|
|
|
|
/*
|
|
* If the caller asked us to exchange the file sizes after the
|
|
* exchange and either we just exchanged the last mappings in
|
|
* the range or we didn't find anything to exchange, update the
|
|
* ondisk file sizes.
|
|
*/
|
|
if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
|
|
!xmi_has_more_exchange_work(xmi)) {
|
|
xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
|
|
xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
|
|
|
|
xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
|
|
xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
|
|
}
|
|
} else if (xmi_has_postop_work(xmi)) {
|
|
/*
|
|
* Now that we're finished with the exchange operation,
|
|
* complete the post-op cleanup work.
|
|
*/
|
|
error = xfs_exchmaps_do_postop_work(tp, xmi);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
|
|
return -EIO;
|
|
|
|
/* If we still have work to do, ask for a new transaction. */
|
|
if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
|
|
trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
/*
|
|
* If we reach here, we've finished all the exchange work and the post
|
|
* operation work. The last thing we need to do before returning to
|
|
* the caller is to make sure that COW forks are set up correctly.
|
|
*/
|
|
if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
|
|
xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
|
|
xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Compute the amount of bmbt blocks we should reserve for each file. In the
|
|
* worst case, each exchange will fill a hole with a new mapping, which could
|
|
* result in a btree split every time we add a new leaf block.
|
|
*/
|
|
static inline uint64_t
|
|
xfs_exchmaps_bmbt_blocks(
|
|
struct xfs_mount *mp,
|
|
const struct xfs_exchmaps_req *req)
|
|
{
|
|
return howmany_64(req->nr_exchanges,
|
|
XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
|
|
XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
|
|
}
|
|
|
|
/* Compute the space we should reserve for the rmap btree expansions. */
|
|
static inline uint64_t
|
|
xfs_exchmaps_rmapbt_blocks(
|
|
struct xfs_mount *mp,
|
|
const struct xfs_exchmaps_req *req)
|
|
{
|
|
if (!xfs_has_rmapbt(mp))
|
|
return 0;
|
|
if (XFS_IS_REALTIME_INODE(req->ip1))
|
|
return 0;
|
|
|
|
return howmany_64(req->nr_exchanges,
|
|
XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
|
|
XFS_RMAPADD_SPACE_RES(mp);
|
|
}
|
|
|
|
/* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
|
|
int
|
|
xfs_exchmaps_estimate_overhead(
|
|
struct xfs_exchmaps_req *req)
|
|
{
|
|
struct xfs_mount *mp = req->ip1->i_mount;
|
|
xfs_filblks_t bmbt_blocks;
|
|
xfs_filblks_t rmapbt_blocks;
|
|
xfs_filblks_t resblks = req->resblks;
|
|
|
|
/*
|
|
* Compute the number of bmbt and rmapbt blocks we might need to handle
|
|
* the estimated number of exchanges.
|
|
*/
|
|
bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
|
|
rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
|
|
|
|
trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
|
|
|
|
/* Make sure the change in file block count doesn't overflow. */
|
|
if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
|
|
return -EFBIG;
|
|
if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
|
|
return -EFBIG;
|
|
|
|
/*
|
|
* Add together the number of blocks we need to handle btree growth,
|
|
* then add it to the number of blocks we need to reserve to this
|
|
* transaction.
|
|
*/
|
|
if (check_add_overflow(resblks, bmbt_blocks, &resblks))
|
|
return -ENOSPC;
|
|
if (check_add_overflow(resblks, bmbt_blocks, &resblks))
|
|
return -ENOSPC;
|
|
if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
|
|
return -ENOSPC;
|
|
if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
|
|
return -ENOSPC;
|
|
|
|
/* Can't actually reserve more than UINT_MAX blocks. */
|
|
if (req->resblks > UINT_MAX)
|
|
return -ENOSPC;
|
|
|
|
req->resblks = resblks;
|
|
trace_xfs_exchmaps_final_estimate(req);
|
|
return 0;
|
|
}
|
|
|
|
/* Decide if we can merge two real mappings. */
|
|
static inline bool
|
|
xmi_can_merge(
|
|
const struct xfs_bmbt_irec *b1,
|
|
const struct xfs_bmbt_irec *b2)
|
|
{
|
|
/* Don't merge holes. */
|
|
if (b1->br_startblock == HOLESTARTBLOCK ||
|
|
b2->br_startblock == HOLESTARTBLOCK)
|
|
return false;
|
|
|
|
/* We don't merge holes. */
|
|
if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
|
|
return false;
|
|
|
|
if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
|
|
b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
|
|
b1->br_state == b2->br_state &&
|
|
b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Decide if we can merge three mappings. Caller must ensure all three
|
|
* mappings must not be holes or delalloc reservations.
|
|
*/
|
|
static inline bool
|
|
xmi_can_merge_all(
|
|
const struct xfs_bmbt_irec *l,
|
|
const struct xfs_bmbt_irec *m,
|
|
const struct xfs_bmbt_irec *r)
|
|
{
|
|
xfs_filblks_t new_len;
|
|
|
|
new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
|
|
return new_len <= XFS_MAX_BMBT_EXTLEN;
|
|
}
|
|
|
|
#define CLEFT_CONTIG 0x01
|
|
#define CRIGHT_CONTIG 0x02
|
|
#define CHOLE 0x04
|
|
#define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
|
|
|
|
#define NLEFT_CONTIG 0x10
|
|
#define NRIGHT_CONTIG 0x20
|
|
#define NHOLE 0x40
|
|
#define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
|
|
|
|
/* Estimate the effect of a single exchange on mapping count. */
|
|
static inline int
|
|
xmi_delta_nextents_step(
|
|
struct xfs_mount *mp,
|
|
const struct xfs_bmbt_irec *left,
|
|
const struct xfs_bmbt_irec *curr,
|
|
const struct xfs_bmbt_irec *new,
|
|
const struct xfs_bmbt_irec *right)
|
|
{
|
|
bool lhole, rhole, chole, nhole;
|
|
unsigned int state = 0;
|
|
int ret = 0;
|
|
|
|
lhole = left->br_startblock == HOLESTARTBLOCK;
|
|
rhole = right->br_startblock == HOLESTARTBLOCK;
|
|
chole = curr->br_startblock == HOLESTARTBLOCK;
|
|
nhole = new->br_startblock == HOLESTARTBLOCK;
|
|
|
|
if (chole)
|
|
state |= CHOLE;
|
|
if (!lhole && !chole && xmi_can_merge(left, curr))
|
|
state |= CLEFT_CONTIG;
|
|
if (!rhole && !chole && xmi_can_merge(curr, right))
|
|
state |= CRIGHT_CONTIG;
|
|
if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
|
|
!xmi_can_merge_all(left, curr, right))
|
|
state &= ~CRIGHT_CONTIG;
|
|
|
|
if (nhole)
|
|
state |= NHOLE;
|
|
if (!lhole && !nhole && xmi_can_merge(left, new))
|
|
state |= NLEFT_CONTIG;
|
|
if (!rhole && !nhole && xmi_can_merge(new, right))
|
|
state |= NRIGHT_CONTIG;
|
|
if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
|
|
!xmi_can_merge_all(left, new, right))
|
|
state &= ~NRIGHT_CONTIG;
|
|
|
|
switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
|
|
case CLEFT_CONTIG | CRIGHT_CONTIG:
|
|
/*
|
|
* left/curr/right are the same mapping, so deleting curr
|
|
* causes 2 new mappings to be created.
|
|
*/
|
|
ret += 2;
|
|
break;
|
|
case 0:
|
|
/*
|
|
* curr is not contiguous with any mapping, so we remove curr
|
|
* completely
|
|
*/
|
|
ret--;
|
|
break;
|
|
case CHOLE:
|
|
/* hole, do nothing */
|
|
break;
|
|
case CLEFT_CONTIG:
|
|
case CRIGHT_CONTIG:
|
|
/* trim either left or right, no change */
|
|
break;
|
|
}
|
|
|
|
switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
|
|
case NLEFT_CONTIG | NRIGHT_CONTIG:
|
|
/*
|
|
* left/curr/right will become the same mapping, so adding
|
|
* curr causes the deletion of right.
|
|
*/
|
|
ret--;
|
|
break;
|
|
case 0:
|
|
/* new is not contiguous with any mapping */
|
|
ret++;
|
|
break;
|
|
case NHOLE:
|
|
/* hole, do nothing. */
|
|
break;
|
|
case NLEFT_CONTIG:
|
|
case NRIGHT_CONTIG:
|
|
/* new is absorbed into left or right, no change */
|
|
break;
|
|
}
|
|
|
|
trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
|
|
state);
|
|
return ret;
|
|
}
|
|
|
|
/* Make sure we don't overflow the extent (mapping) counters. */
|
|
static inline int
|
|
xmi_ensure_delta_nextents(
|
|
struct xfs_exchmaps_req *req,
|
|
struct xfs_inode *ip,
|
|
int64_t delta)
|
|
{
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
int whichfork = xfs_exchmaps_reqfork(req);
|
|
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
|
|
uint64_t new_nextents;
|
|
xfs_extnum_t max_nextents;
|
|
|
|
if (delta < 0)
|
|
return 0;
|
|
|
|
/*
|
|
* It's always an error if the delta causes integer overflow. delta
|
|
* needs an explicit cast here to avoid warnings about implicit casts
|
|
* coded into the overflow check.
|
|
*/
|
|
if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
|
|
&new_nextents))
|
|
return -EFBIG;
|
|
|
|
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
|
|
new_nextents > 10)
|
|
return -EFBIG;
|
|
|
|
/*
|
|
* We always promote both inodes to have large extent counts if the
|
|
* superblock feature is enabled, so we only need to check against the
|
|
* theoretical maximum.
|
|
*/
|
|
max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
|
|
whichfork);
|
|
if (new_nextents > max_nextents)
|
|
return -EFBIG;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Find the next mapping after irec. */
|
|
static inline int
|
|
xmi_next(
|
|
struct xfs_inode *ip,
|
|
int bmap_flags,
|
|
const struct xfs_bmbt_irec *irec,
|
|
struct xfs_bmbt_irec *nrec)
|
|
{
|
|
xfs_fileoff_t off;
|
|
xfs_filblks_t blockcount;
|
|
int nimaps = 1;
|
|
int error;
|
|
|
|
off = irec->br_startoff + irec->br_blockcount;
|
|
blockcount = XFS_MAX_FILEOFF - off;
|
|
error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
|
|
if (error)
|
|
return error;
|
|
if (nrec->br_startblock == DELAYSTARTBLOCK ||
|
|
nrec->br_startoff != off) {
|
|
/*
|
|
* If we don't get the mapping we want, return a zero-length
|
|
* mapping, which our estimator function will pretend is a hole.
|
|
* We shouldn't get delalloc reservations.
|
|
*/
|
|
nrec->br_startblock = HOLESTARTBLOCK;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int __init
|
|
xfs_exchmaps_intent_init_cache(void)
|
|
{
|
|
xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
|
|
sizeof(struct xfs_exchmaps_intent),
|
|
0, 0, NULL);
|
|
|
|
return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
|
|
}
|
|
|
|
void
|
|
xfs_exchmaps_intent_destroy_cache(void)
|
|
{
|
|
kmem_cache_destroy(xfs_exchmaps_intent_cache);
|
|
xfs_exchmaps_intent_cache = NULL;
|
|
}
|
|
|
|
/*
|
|
* Decide if we will exchange the reflink flags between the two files after the
|
|
* exchange. The only time we want to do this is if we're exchanging all
|
|
* mappings under EOF and the inode reflink flags have different states.
|
|
*/
|
|
static inline bool
|
|
xmi_can_exchange_reflink_flags(
|
|
const struct xfs_exchmaps_req *req,
|
|
unsigned int reflink_state)
|
|
{
|
|
struct xfs_mount *mp = req->ip1->i_mount;
|
|
|
|
if (hweight32(reflink_state) != 1)
|
|
return false;
|
|
if (req->startoff1 != 0 || req->startoff2 != 0)
|
|
return false;
|
|
if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
|
|
return false;
|
|
if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
|
|
/* Allocate and initialize a new incore intent item from a request. */
|
|
struct xfs_exchmaps_intent *
|
|
xfs_exchmaps_init_intent(
|
|
const struct xfs_exchmaps_req *req)
|
|
{
|
|
struct xfs_exchmaps_intent *xmi;
|
|
unsigned int rs = 0;
|
|
|
|
xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
|
|
GFP_NOFS | __GFP_NOFAIL);
|
|
INIT_LIST_HEAD(&xmi->xmi_list);
|
|
xmi->xmi_ip1 = req->ip1;
|
|
xmi->xmi_ip2 = req->ip2;
|
|
xmi->xmi_startoff1 = req->startoff1;
|
|
xmi->xmi_startoff2 = req->startoff2;
|
|
xmi->xmi_blockcount = req->blockcount;
|
|
xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
|
|
xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
|
|
|
|
if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
|
|
xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
|
|
return xmi;
|
|
}
|
|
|
|
if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
|
|
xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
|
|
xmi->xmi_isize1 = req->ip2->i_disk_size;
|
|
xmi->xmi_isize2 = req->ip1->i_disk_size;
|
|
}
|
|
|
|
/* Record the state of each inode's reflink flag before the op. */
|
|
if (xfs_is_reflink_inode(req->ip1))
|
|
rs |= 1;
|
|
if (xfs_is_reflink_inode(req->ip2))
|
|
rs |= 2;
|
|
|
|
/*
|
|
* Figure out if we're clearing the reflink flags (which effectively
|
|
* exchanges them) after the operation.
|
|
*/
|
|
if (xmi_can_exchange_reflink_flags(req, rs)) {
|
|
if (rs & 1)
|
|
xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
|
|
if (rs & 2)
|
|
xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
|
|
}
|
|
|
|
if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
|
|
S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
|
|
xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
|
|
|
|
return xmi;
|
|
}
|
|
|
|
/*
|
|
* Estimate the number of exchange operations and the number of file blocks
|
|
* in each file that will be affected by the exchange operation.
|
|
*/
|
|
int
|
|
xfs_exchmaps_estimate(
|
|
struct xfs_exchmaps_req *req)
|
|
{
|
|
struct xfs_exchmaps_intent *xmi;
|
|
struct xfs_bmbt_irec irec1, irec2;
|
|
struct xfs_exchmaps_adjacent adj = ADJACENT_INIT;
|
|
xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
|
|
int64_t d_nexts1, d_nexts2;
|
|
int bmap_flags;
|
|
int error;
|
|
|
|
ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
|
|
|
|
bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
|
|
xmi = xfs_exchmaps_init_intent(req);
|
|
|
|
/*
|
|
* To guard against the possibility of overflowing the extent counters,
|
|
* we have to estimate an upper bound on the potential increase in that
|
|
* counter. We can split the mapping at each end of the range, and for
|
|
* each step of the exchange we can split the mapping that we're
|
|
* working on if the mappings do not align.
|
|
*/
|
|
d_nexts1 = d_nexts2 = 3;
|
|
|
|
while (xmi_has_more_exchange_work(xmi)) {
|
|
/*
|
|
* Walk through the file ranges until we find something to
|
|
* exchange. Because we're simulating the exchange, pass in
|
|
* adj to capture skipped mappings for correct estimation of
|
|
* bmbt record merges.
|
|
*/
|
|
error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
|
|
if (error)
|
|
goto out_free;
|
|
if (!xmi_has_more_exchange_work(xmi))
|
|
break;
|
|
|
|
/* Update accounting. */
|
|
if (xfs_bmap_is_real_extent(&irec1))
|
|
ip1_blocks += irec1.br_blockcount;
|
|
if (xfs_bmap_is_real_extent(&irec2))
|
|
ip2_blocks += irec2.br_blockcount;
|
|
req->nr_exchanges++;
|
|
|
|
/* Read the next mappings from both files. */
|
|
error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
|
|
if (error)
|
|
goto out_free;
|
|
|
|
error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
|
|
if (error)
|
|
goto out_free;
|
|
|
|
/* Update extent count deltas. */
|
|
d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
|
|
&adj.left1, &irec1, &irec2, &adj.right1);
|
|
|
|
d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
|
|
&adj.left2, &irec2, &irec1, &adj.right2);
|
|
|
|
/* Now pretend we exchanged the mappings. */
|
|
if (xmi_can_merge(&adj.left2, &irec1))
|
|
adj.left2.br_blockcount += irec1.br_blockcount;
|
|
else
|
|
memcpy(&adj.left2, &irec1, sizeof(irec1));
|
|
|
|
if (xmi_can_merge(&adj.left1, &irec2))
|
|
adj.left1.br_blockcount += irec2.br_blockcount;
|
|
else
|
|
memcpy(&adj.left1, &irec2, sizeof(irec2));
|
|
|
|
xmi_advance(xmi, &irec1);
|
|
}
|
|
|
|
/* Account for the blocks that are being exchanged. */
|
|
if (XFS_IS_REALTIME_INODE(req->ip1) &&
|
|
xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
|
|
req->ip1_rtbcount = ip1_blocks;
|
|
req->ip2_rtbcount = ip2_blocks;
|
|
} else {
|
|
req->ip1_bcount = ip1_blocks;
|
|
req->ip2_bcount = ip2_blocks;
|
|
}
|
|
|
|
/*
|
|
* Make sure that both forks have enough slack left in their extent
|
|
* counters that the exchange operation will not overflow.
|
|
*/
|
|
trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
|
|
if (req->ip1 == req->ip2) {
|
|
error = xmi_ensure_delta_nextents(req, req->ip1,
|
|
d_nexts1 + d_nexts2);
|
|
} else {
|
|
error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
|
|
if (error)
|
|
goto out_free;
|
|
error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
|
|
}
|
|
if (error)
|
|
goto out_free;
|
|
|
|
trace_xfs_exchmaps_initial_estimate(req);
|
|
error = xfs_exchmaps_estimate_overhead(req);
|
|
out_free:
|
|
kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
|
|
return error;
|
|
}
|
|
|
|
/* Set the reflink flag before an operation. */
|
|
static inline void
|
|
xfs_exchmaps_set_reflink(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip)
|
|
{
|
|
trace_xfs_reflink_set_inode_flag(ip);
|
|
|
|
ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
|
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
|
}
|
|
|
|
/*
|
|
* If either file has shared blocks and we're exchanging data forks, we must
|
|
* flag the other file as having shared blocks so that we get the shared-block
|
|
* rmap functions if we need to fix up the rmaps.
|
|
*/
|
|
void
|
|
xfs_exchmaps_ensure_reflink(
|
|
struct xfs_trans *tp,
|
|
const struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
unsigned int rs = 0;
|
|
|
|
if (xfs_is_reflink_inode(xmi->xmi_ip1))
|
|
rs |= 1;
|
|
if (xfs_is_reflink_inode(xmi->xmi_ip2))
|
|
rs |= 2;
|
|
|
|
if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
|
|
xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
|
|
|
|
if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
|
|
xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
|
|
}
|
|
|
|
/* Set the large extent count flag before an operation if needed. */
|
|
static inline void
|
|
xfs_exchmaps_ensure_large_extent_counts(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip)
|
|
{
|
|
if (xfs_inode_has_large_extent_counts(ip))
|
|
return;
|
|
|
|
ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
|
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
|
}
|
|
|
|
/* Widen the extent counter fields of both inodes if necessary. */
|
|
void
|
|
xfs_exchmaps_upgrade_extent_counts(
|
|
struct xfs_trans *tp,
|
|
const struct xfs_exchmaps_intent *xmi)
|
|
{
|
|
if (!xfs_has_large_extent_counts(tp->t_mountp))
|
|
return;
|
|
|
|
xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
|
|
xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
|
|
}
|
|
|
|
/*
|
|
* Schedule an exchange a range of mappings from one inode to another.
|
|
*
|
|
* The use of file mapping exchange log intent items ensures the operation can
|
|
* be resumed even if the system goes down. The caller must commit the
|
|
* transaction to start the work.
|
|
*
|
|
* The caller must ensure the inodes must be joined to the transaction and
|
|
* ILOCKd; they will still be joined to the transaction at exit.
|
|
*/
|
|
void
|
|
xfs_exchange_mappings(
|
|
struct xfs_trans *tp,
|
|
const struct xfs_exchmaps_req *req)
|
|
{
|
|
struct xfs_exchmaps_intent *xmi;
|
|
|
|
BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
|
|
|
|
xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
|
|
xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
|
|
ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
|
|
if (req->flags & XFS_EXCHMAPS_SET_SIZES)
|
|
ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
|
|
ASSERT(xfs_has_exchange_range(tp->t_mountp));
|
|
|
|
if (req->blockcount == 0)
|
|
return;
|
|
|
|
xmi = xfs_exchmaps_init_intent(req);
|
|
xfs_exchmaps_defer_add(tp, xmi);
|
|
xfs_exchmaps_ensure_reflink(tp, xmi);
|
|
xfs_exchmaps_upgrade_extent_counts(tp, xmi);
|
|
}
|