c24b5dfadc
Now we have xfs_inode.c for holding kernel-only XFS inode operations, move all the inode operations from xfs_vnodeops.c to this new file as it holds another set of kernel-only inode operations. The name of this file traces back to the days of Irix and it's vnodes which we don't have anymore. Essentially this move consolidates the inode locking functions and a bunch of XFS inode operations into the one file. Eventually the high level functions will be merged into the VFS interface functions in xfs_iops.c. This leaves only internal preallocation, EOF block manipulation and hole punching functions in vnodeops.c. Move these to xfs_bmap_util.c where we are already consolidating various in-kernel physical extent manipulation and querying functions. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
1696 lines
44 KiB
C
1696 lines
44 KiB
C
/*
|
|
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
|
|
* Copyright (c) 2012 Red Hat, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_bit.h"
|
|
#include "xfs_log.h"
|
|
#include "xfs_inum.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_sb.h"
|
|
#include "xfs_ag.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_da_btree.h"
|
|
#include "xfs_bmap_btree.h"
|
|
#include "xfs_alloc_btree.h"
|
|
#include "xfs_ialloc_btree.h"
|
|
#include "xfs_dinode.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_btree.h"
|
|
#include "xfs_extfree_item.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_bmap_util.h"
|
|
#include "xfs_rtalloc.h"
|
|
#include "xfs_error.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_trans_space.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_icache.h"
|
|
|
|
/* Kernel only BMAP related definitions and functions */
|
|
|
|
/*
|
|
* Convert the given file system block to a disk block. We have to treat it
|
|
* differently based on whether the file is a real time file or not, because the
|
|
* bmap code does.
|
|
*/
|
|
xfs_daddr_t
|
|
xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
|
|
{
|
|
return (XFS_IS_REALTIME_INODE(ip) ? \
|
|
(xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
|
|
XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
|
|
}
|
|
|
|
/*
|
|
* Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
|
|
* caller. Frees all the extents that need freeing, which must be done
|
|
* last due to locking considerations. We never free any extents in
|
|
* the first transaction.
|
|
*
|
|
* Return 1 if the given transaction was committed and a new one
|
|
* started, and 0 otherwise in the committed parameter.
|
|
*/
|
|
int /* error */
|
|
xfs_bmap_finish(
|
|
xfs_trans_t **tp, /* transaction pointer addr */
|
|
xfs_bmap_free_t *flist, /* i/o: list extents to free */
|
|
int *committed) /* xact committed or not */
|
|
{
|
|
xfs_efd_log_item_t *efd; /* extent free data */
|
|
xfs_efi_log_item_t *efi; /* extent free intention */
|
|
int error; /* error return value */
|
|
xfs_bmap_free_item_t *free; /* free extent item */
|
|
unsigned int logres; /* new log reservation */
|
|
unsigned int logcount; /* new log count */
|
|
xfs_mount_t *mp; /* filesystem mount structure */
|
|
xfs_bmap_free_item_t *next; /* next item on free list */
|
|
xfs_trans_t *ntp; /* new transaction pointer */
|
|
|
|
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
|
|
if (flist->xbf_count == 0) {
|
|
*committed = 0;
|
|
return 0;
|
|
}
|
|
ntp = *tp;
|
|
efi = xfs_trans_get_efi(ntp, flist->xbf_count);
|
|
for (free = flist->xbf_first; free; free = free->xbfi_next)
|
|
xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
|
|
free->xbfi_blockcount);
|
|
logres = ntp->t_log_res;
|
|
logcount = ntp->t_log_count;
|
|
ntp = xfs_trans_dup(*tp);
|
|
error = xfs_trans_commit(*tp, 0);
|
|
*tp = ntp;
|
|
*committed = 1;
|
|
/*
|
|
* We have a new transaction, so we should return committed=1,
|
|
* even though we're returning an error.
|
|
*/
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* transaction commit worked ok so we can drop the extra ticket
|
|
* reference that we gained in xfs_trans_dup()
|
|
*/
|
|
xfs_log_ticket_put(ntp->t_ticket);
|
|
|
|
if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
|
|
logcount)))
|
|
return error;
|
|
efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
|
|
for (free = flist->xbf_first; free != NULL; free = next) {
|
|
next = free->xbfi_next;
|
|
if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
|
|
free->xbfi_blockcount))) {
|
|
/*
|
|
* The bmap free list will be cleaned up at a
|
|
* higher level. The EFI will be canceled when
|
|
* this transaction is aborted.
|
|
* Need to force shutdown here to make sure it
|
|
* happens, since this transaction may not be
|
|
* dirty yet.
|
|
*/
|
|
mp = ntp->t_mountp;
|
|
if (!XFS_FORCED_SHUTDOWN(mp))
|
|
xfs_force_shutdown(mp,
|
|
(error == EFSCORRUPTED) ?
|
|
SHUTDOWN_CORRUPT_INCORE :
|
|
SHUTDOWN_META_IO_ERROR);
|
|
return error;
|
|
}
|
|
xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
|
|
free->xbfi_blockcount);
|
|
xfs_bmap_del_free(flist, NULL, free);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
xfs_bmap_rtalloc(
|
|
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
|
|
{
|
|
xfs_alloctype_t atype = 0; /* type for allocation routines */
|
|
int error; /* error return value */
|
|
xfs_mount_t *mp; /* mount point structure */
|
|
xfs_extlen_t prod = 0; /* product factor for allocators */
|
|
xfs_extlen_t ralen = 0; /* realtime allocation length */
|
|
xfs_extlen_t align; /* minimum allocation alignment */
|
|
xfs_rtblock_t rtb;
|
|
|
|
mp = ap->ip->i_mount;
|
|
align = xfs_get_extsz_hint(ap->ip);
|
|
prod = align / mp->m_sb.sb_rextsize;
|
|
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
|
|
align, 1, ap->eof, 0,
|
|
ap->conv, &ap->offset, &ap->length);
|
|
if (error)
|
|
return error;
|
|
ASSERT(ap->length);
|
|
ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
|
|
|
|
/*
|
|
* If the offset & length are not perfectly aligned
|
|
* then kill prod, it will just get us in trouble.
|
|
*/
|
|
if (do_mod(ap->offset, align) || ap->length % align)
|
|
prod = 1;
|
|
/*
|
|
* Set ralen to be the actual requested length in rtextents.
|
|
*/
|
|
ralen = ap->length / mp->m_sb.sb_rextsize;
|
|
/*
|
|
* If the old value was close enough to MAXEXTLEN that
|
|
* we rounded up to it, cut it back so it's valid again.
|
|
* Note that if it's a really large request (bigger than
|
|
* MAXEXTLEN), we don't hear about that number, and can't
|
|
* adjust the starting point to match it.
|
|
*/
|
|
if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
|
|
ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
|
|
|
|
/*
|
|
* Lock out other modifications to the RT bitmap inode.
|
|
*/
|
|
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
|
|
xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
|
|
|
|
/*
|
|
* If it's an allocation to an empty file at offset 0,
|
|
* pick an extent that will space things out in the rt area.
|
|
*/
|
|
if (ap->eof && ap->offset == 0) {
|
|
xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
|
|
|
|
error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
|
|
if (error)
|
|
return error;
|
|
ap->blkno = rtx * mp->m_sb.sb_rextsize;
|
|
} else {
|
|
ap->blkno = 0;
|
|
}
|
|
|
|
xfs_bmap_adjacent(ap);
|
|
|
|
/*
|
|
* Realtime allocation, done through xfs_rtallocate_extent.
|
|
*/
|
|
atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
|
|
do_div(ap->blkno, mp->m_sb.sb_rextsize);
|
|
rtb = ap->blkno;
|
|
ap->length = ralen;
|
|
if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
|
|
&ralen, atype, ap->wasdel, prod, &rtb)))
|
|
return error;
|
|
if (rtb == NULLFSBLOCK && prod > 1 &&
|
|
(error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
|
|
ap->length, &ralen, atype,
|
|
ap->wasdel, 1, &rtb)))
|
|
return error;
|
|
ap->blkno = rtb;
|
|
if (ap->blkno != NULLFSBLOCK) {
|
|
ap->blkno *= mp->m_sb.sb_rextsize;
|
|
ralen *= mp->m_sb.sb_rextsize;
|
|
ap->length = ralen;
|
|
ap->ip->i_d.di_nblocks += ralen;
|
|
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
|
|
if (ap->wasdel)
|
|
ap->ip->i_delayed_blks -= ralen;
|
|
/*
|
|
* Adjust the disk quota also. This was reserved
|
|
* earlier.
|
|
*/
|
|
xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
|
|
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
|
|
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
|
|
} else {
|
|
ap->length = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Stack switching interfaces for allocation
|
|
*/
|
|
static void
|
|
xfs_bmapi_allocate_worker(
|
|
struct work_struct *work)
|
|
{
|
|
struct xfs_bmalloca *args = container_of(work,
|
|
struct xfs_bmalloca, work);
|
|
unsigned long pflags;
|
|
|
|
/* we are in a transaction context here */
|
|
current_set_flags_nested(&pflags, PF_FSTRANS);
|
|
|
|
args->result = __xfs_bmapi_allocate(args);
|
|
complete(args->done);
|
|
|
|
current_restore_flags_nested(&pflags, PF_FSTRANS);
|
|
}
|
|
|
|
/*
|
|
* Some allocation requests often come in with little stack to work on. Push
|
|
* them off to a worker thread so there is lots of stack to use. Otherwise just
|
|
* call directly to avoid the context switch overhead here.
|
|
*/
|
|
int
|
|
xfs_bmapi_allocate(
|
|
struct xfs_bmalloca *args)
|
|
{
|
|
DECLARE_COMPLETION_ONSTACK(done);
|
|
|
|
if (!args->stack_switch)
|
|
return __xfs_bmapi_allocate(args);
|
|
|
|
|
|
args->done = &done;
|
|
INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
|
|
queue_work(xfs_alloc_wq, &args->work);
|
|
wait_for_completion(&done);
|
|
return args->result;
|
|
}
|
|
|
|
/*
|
|
* Check if the endoff is outside the last extent. If so the caller will grow
|
|
* the allocation to a stripe unit boundary. All offsets are considered outside
|
|
* the end of file for an empty fork, so 1 is returned in *eof in that case.
|
|
*/
|
|
int
|
|
xfs_bmap_eof(
|
|
struct xfs_inode *ip,
|
|
xfs_fileoff_t endoff,
|
|
int whichfork,
|
|
int *eof)
|
|
{
|
|
struct xfs_bmbt_irec rec;
|
|
int error;
|
|
|
|
error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
|
|
if (error || *eof)
|
|
return error;
|
|
|
|
*eof = endoff >= rec.br_startoff + rec.br_blockcount;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Extent tree block counting routines.
|
|
*/
|
|
|
|
/*
|
|
* Count leaf blocks given a range of extent records.
|
|
*/
|
|
STATIC void
|
|
xfs_bmap_count_leaves(
|
|
xfs_ifork_t *ifp,
|
|
xfs_extnum_t idx,
|
|
int numrecs,
|
|
int *count)
|
|
{
|
|
int b;
|
|
|
|
for (b = 0; b < numrecs; b++) {
|
|
xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
|
|
*count += xfs_bmbt_get_blockcount(frp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Count leaf blocks given a range of extent records originally
|
|
* in btree format.
|
|
*/
|
|
STATIC void
|
|
xfs_bmap_disk_count_leaves(
|
|
struct xfs_mount *mp,
|
|
struct xfs_btree_block *block,
|
|
int numrecs,
|
|
int *count)
|
|
{
|
|
int b;
|
|
xfs_bmbt_rec_t *frp;
|
|
|
|
for (b = 1; b <= numrecs; b++) {
|
|
frp = XFS_BMBT_REC_ADDR(mp, block, b);
|
|
*count += xfs_bmbt_disk_get_blockcount(frp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Recursively walks each level of a btree
|
|
* to count total fsblocks is use.
|
|
*/
|
|
STATIC int /* error */
|
|
xfs_bmap_count_tree(
|
|
xfs_mount_t *mp, /* file system mount point */
|
|
xfs_trans_t *tp, /* transaction pointer */
|
|
xfs_ifork_t *ifp, /* inode fork pointer */
|
|
xfs_fsblock_t blockno, /* file system block number */
|
|
int levelin, /* level in btree */
|
|
int *count) /* Count of blocks */
|
|
{
|
|
int error;
|
|
xfs_buf_t *bp, *nbp;
|
|
int level = levelin;
|
|
__be64 *pp;
|
|
xfs_fsblock_t bno = blockno;
|
|
xfs_fsblock_t nextbno;
|
|
struct xfs_btree_block *block, *nextblock;
|
|
int numrecs;
|
|
|
|
error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
|
|
&xfs_bmbt_buf_ops);
|
|
if (error)
|
|
return error;
|
|
*count += 1;
|
|
block = XFS_BUF_TO_BLOCK(bp);
|
|
|
|
if (--level) {
|
|
/* Not at node above leaves, count this level of nodes */
|
|
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
|
|
while (nextbno != NULLFSBLOCK) {
|
|
error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
|
|
XFS_BMAP_BTREE_REF,
|
|
&xfs_bmbt_buf_ops);
|
|
if (error)
|
|
return error;
|
|
*count += 1;
|
|
nextblock = XFS_BUF_TO_BLOCK(nbp);
|
|
nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
|
|
xfs_trans_brelse(tp, nbp);
|
|
}
|
|
|
|
/* Dive to the next level */
|
|
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
|
|
bno = be64_to_cpu(*pp);
|
|
if (unlikely((error =
|
|
xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
|
|
xfs_trans_brelse(tp, bp);
|
|
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
|
|
XFS_ERRLEVEL_LOW, mp);
|
|
return XFS_ERROR(EFSCORRUPTED);
|
|
}
|
|
xfs_trans_brelse(tp, bp);
|
|
} else {
|
|
/* count all level 1 nodes and their leaves */
|
|
for (;;) {
|
|
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
|
|
numrecs = be16_to_cpu(block->bb_numrecs);
|
|
xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
|
|
xfs_trans_brelse(tp, bp);
|
|
if (nextbno == NULLFSBLOCK)
|
|
break;
|
|
bno = nextbno;
|
|
error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
|
|
XFS_BMAP_BTREE_REF,
|
|
&xfs_bmbt_buf_ops);
|
|
if (error)
|
|
return error;
|
|
*count += 1;
|
|
block = XFS_BUF_TO_BLOCK(bp);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Count fsblocks of the given fork.
|
|
*/
|
|
int /* error */
|
|
xfs_bmap_count_blocks(
|
|
xfs_trans_t *tp, /* transaction pointer */
|
|
xfs_inode_t *ip, /* incore inode */
|
|
int whichfork, /* data or attr fork */
|
|
int *count) /* out: count of blocks */
|
|
{
|
|
struct xfs_btree_block *block; /* current btree block */
|
|
xfs_fsblock_t bno; /* block # of "block" */
|
|
xfs_ifork_t *ifp; /* fork structure */
|
|
int level; /* btree level, for checking */
|
|
xfs_mount_t *mp; /* file system mount structure */
|
|
__be64 *pp; /* pointer to block address */
|
|
|
|
bno = NULLFSBLOCK;
|
|
mp = ip->i_mount;
|
|
ifp = XFS_IFORK_PTR(ip, whichfork);
|
|
if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
|
|
xfs_bmap_count_leaves(ifp, 0,
|
|
ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
|
|
count);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
|
|
*/
|
|
block = ifp->if_broot;
|
|
level = be16_to_cpu(block->bb_level);
|
|
ASSERT(level > 0);
|
|
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
|
|
bno = be64_to_cpu(*pp);
|
|
ASSERT(bno != NULLDFSBNO);
|
|
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
|
|
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
|
|
|
|
if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
|
|
XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
|
|
mp);
|
|
return XFS_ERROR(EFSCORRUPTED);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* returns 1 for success, 0 if we failed to map the extent.
|
|
*/
|
|
STATIC int
|
|
xfs_getbmapx_fix_eof_hole(
|
|
xfs_inode_t *ip, /* xfs incore inode pointer */
|
|
struct getbmapx *out, /* output structure */
|
|
int prealloced, /* this is a file with
|
|
* preallocated data space */
|
|
__int64_t end, /* last block requested */
|
|
xfs_fsblock_t startblock)
|
|
{
|
|
__int64_t fixlen;
|
|
xfs_mount_t *mp; /* file system mount point */
|
|
xfs_ifork_t *ifp; /* inode fork pointer */
|
|
xfs_extnum_t lastx; /* last extent pointer */
|
|
xfs_fileoff_t fileblock;
|
|
|
|
if (startblock == HOLESTARTBLOCK) {
|
|
mp = ip->i_mount;
|
|
out->bmv_block = -1;
|
|
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
|
|
fixlen -= out->bmv_offset;
|
|
if (prealloced && out->bmv_offset + out->bmv_length == end) {
|
|
/* Came to hole at EOF. Trim it. */
|
|
if (fixlen <= 0)
|
|
return 0;
|
|
out->bmv_length = fixlen;
|
|
}
|
|
} else {
|
|
if (startblock == DELAYSTARTBLOCK)
|
|
out->bmv_block = -2;
|
|
else
|
|
out->bmv_block = xfs_fsb_to_db(ip, startblock);
|
|
fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
|
|
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
|
|
if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
|
|
(lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
|
|
out->bmv_oflags |= BMV_OF_LAST;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Get inode's extents as described in bmv, and format for output.
|
|
* Calls formatter to fill the user's buffer until all extents
|
|
* are mapped, until the passed-in bmv->bmv_count slots have
|
|
* been filled, or until the formatter short-circuits the loop,
|
|
* if it is tracking filled-in extents on its own.
|
|
*/
|
|
int /* error code */
|
|
xfs_getbmap(
|
|
xfs_inode_t *ip,
|
|
struct getbmapx *bmv, /* user bmap structure */
|
|
xfs_bmap_format_t formatter, /* format to user */
|
|
void *arg) /* formatter arg */
|
|
{
|
|
__int64_t bmvend; /* last block requested */
|
|
int error = 0; /* return value */
|
|
__int64_t fixlen; /* length for -1 case */
|
|
int i; /* extent number */
|
|
int lock; /* lock state */
|
|
xfs_bmbt_irec_t *map; /* buffer for user's data */
|
|
xfs_mount_t *mp; /* file system mount point */
|
|
int nex; /* # of user extents can do */
|
|
int nexleft; /* # of user extents left */
|
|
int subnex; /* # of bmapi's can do */
|
|
int nmap; /* number of map entries */
|
|
struct getbmapx *out; /* output structure */
|
|
int whichfork; /* data or attr fork */
|
|
int prealloced; /* this is a file with
|
|
* preallocated data space */
|
|
int iflags; /* interface flags */
|
|
int bmapi_flags; /* flags for xfs_bmapi */
|
|
int cur_ext = 0;
|
|
|
|
mp = ip->i_mount;
|
|
iflags = bmv->bmv_iflags;
|
|
whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
|
|
|
|
if (whichfork == XFS_ATTR_FORK) {
|
|
if (XFS_IFORK_Q(ip)) {
|
|
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
|
|
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
|
|
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
|
|
return XFS_ERROR(EINVAL);
|
|
} else if (unlikely(
|
|
ip->i_d.di_aformat != 0 &&
|
|
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
|
|
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
|
|
ip->i_mount);
|
|
return XFS_ERROR(EFSCORRUPTED);
|
|
}
|
|
|
|
prealloced = 0;
|
|
fixlen = 1LL << 32;
|
|
} else {
|
|
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
|
|
ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
|
|
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
|
|
return XFS_ERROR(EINVAL);
|
|
|
|
if (xfs_get_extsz_hint(ip) ||
|
|
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
|
|
prealloced = 1;
|
|
fixlen = mp->m_super->s_maxbytes;
|
|
} else {
|
|
prealloced = 0;
|
|
fixlen = XFS_ISIZE(ip);
|
|
}
|
|
}
|
|
|
|
if (bmv->bmv_length == -1) {
|
|
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
|
|
bmv->bmv_length =
|
|
max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
|
|
} else if (bmv->bmv_length == 0) {
|
|
bmv->bmv_entries = 0;
|
|
return 0;
|
|
} else if (bmv->bmv_length < 0) {
|
|
return XFS_ERROR(EINVAL);
|
|
}
|
|
|
|
nex = bmv->bmv_count - 1;
|
|
if (nex <= 0)
|
|
return XFS_ERROR(EINVAL);
|
|
bmvend = bmv->bmv_offset + bmv->bmv_length;
|
|
|
|
|
|
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
|
|
return XFS_ERROR(ENOMEM);
|
|
out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
|
|
if (!out) {
|
|
out = kmem_zalloc_large(bmv->bmv_count *
|
|
sizeof(struct getbmapx));
|
|
if (!out)
|
|
return XFS_ERROR(ENOMEM);
|
|
}
|
|
|
|
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
|
if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
|
|
if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
|
|
error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
|
|
if (error)
|
|
goto out_unlock_iolock;
|
|
}
|
|
/*
|
|
* even after flushing the inode, there can still be delalloc
|
|
* blocks on the inode beyond EOF due to speculative
|
|
* preallocation. These are not removed until the release
|
|
* function is called or the inode is inactivated. Hence we
|
|
* cannot assert here that ip->i_delayed_blks == 0.
|
|
*/
|
|
}
|
|
|
|
lock = xfs_ilock_map_shared(ip);
|
|
|
|
/*
|
|
* Don't let nex be bigger than the number of extents
|
|
* we can have assuming alternating holes and real extents.
|
|
*/
|
|
if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
|
|
nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
|
|
|
|
bmapi_flags = xfs_bmapi_aflag(whichfork);
|
|
if (!(iflags & BMV_IF_PREALLOC))
|
|
bmapi_flags |= XFS_BMAPI_IGSTATE;
|
|
|
|
/*
|
|
* Allocate enough space to handle "subnex" maps at a time.
|
|
*/
|
|
error = ENOMEM;
|
|
subnex = 16;
|
|
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
|
|
if (!map)
|
|
goto out_unlock_ilock;
|
|
|
|
bmv->bmv_entries = 0;
|
|
|
|
if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
|
|
(whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
|
|
error = 0;
|
|
goto out_free_map;
|
|
}
|
|
|
|
nexleft = nex;
|
|
|
|
do {
|
|
nmap = (nexleft > subnex) ? subnex : nexleft;
|
|
error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
|
|
XFS_BB_TO_FSB(mp, bmv->bmv_length),
|
|
map, &nmap, bmapi_flags);
|
|
if (error)
|
|
goto out_free_map;
|
|
ASSERT(nmap <= subnex);
|
|
|
|
for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
|
|
out[cur_ext].bmv_oflags = 0;
|
|
if (map[i].br_state == XFS_EXT_UNWRITTEN)
|
|
out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
|
|
else if (map[i].br_startblock == DELAYSTARTBLOCK)
|
|
out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
|
|
out[cur_ext].bmv_offset =
|
|
XFS_FSB_TO_BB(mp, map[i].br_startoff);
|
|
out[cur_ext].bmv_length =
|
|
XFS_FSB_TO_BB(mp, map[i].br_blockcount);
|
|
out[cur_ext].bmv_unused1 = 0;
|
|
out[cur_ext].bmv_unused2 = 0;
|
|
|
|
/*
|
|
* delayed allocation extents that start beyond EOF can
|
|
* occur due to speculative EOF allocation when the
|
|
* delalloc extent is larger than the largest freespace
|
|
* extent at conversion time. These extents cannot be
|
|
* converted by data writeback, so can exist here even
|
|
* if we are not supposed to be finding delalloc
|
|
* extents.
|
|
*/
|
|
if (map[i].br_startblock == DELAYSTARTBLOCK &&
|
|
map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
|
|
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
|
|
|
|
if (map[i].br_startblock == HOLESTARTBLOCK &&
|
|
whichfork == XFS_ATTR_FORK) {
|
|
/* came to the end of attribute fork */
|
|
out[cur_ext].bmv_oflags |= BMV_OF_LAST;
|
|
goto out_free_map;
|
|
}
|
|
|
|
if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
|
|
prealloced, bmvend,
|
|
map[i].br_startblock))
|
|
goto out_free_map;
|
|
|
|
bmv->bmv_offset =
|
|
out[cur_ext].bmv_offset +
|
|
out[cur_ext].bmv_length;
|
|
bmv->bmv_length =
|
|
max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
|
|
|
|
/*
|
|
* In case we don't want to return the hole,
|
|
* don't increase cur_ext so that we can reuse
|
|
* it in the next loop.
|
|
*/
|
|
if ((iflags & BMV_IF_NO_HOLES) &&
|
|
map[i].br_startblock == HOLESTARTBLOCK) {
|
|
memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
|
|
continue;
|
|
}
|
|
|
|
nexleft--;
|
|
bmv->bmv_entries++;
|
|
cur_ext++;
|
|
}
|
|
} while (nmap && nexleft && bmv->bmv_length);
|
|
|
|
out_free_map:
|
|
kmem_free(map);
|
|
out_unlock_ilock:
|
|
xfs_iunlock_map_shared(ip, lock);
|
|
out_unlock_iolock:
|
|
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
for (i = 0; i < cur_ext; i++) {
|
|
int full = 0; /* user array is full */
|
|
|
|
/* format results & advance arg */
|
|
error = formatter(&arg, &out[i], &full);
|
|
if (error || full)
|
|
break;
|
|
}
|
|
|
|
if (is_vmalloc_addr(out))
|
|
kmem_free_large(out);
|
|
else
|
|
kmem_free(out);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* dead simple method of punching delalyed allocation blocks from a range in
|
|
* the inode. Walks a block at a time so will be slow, but is only executed in
|
|
* rare error cases so the overhead is not critical. This will alays punch out
|
|
* both the start and end blocks, even if the ranges only partially overlap
|
|
* them, so it is up to the caller to ensure that partial blocks are not
|
|
* passed in.
|
|
*/
|
|
int
|
|
xfs_bmap_punch_delalloc_range(
|
|
struct xfs_inode *ip,
|
|
xfs_fileoff_t start_fsb,
|
|
xfs_fileoff_t length)
|
|
{
|
|
xfs_fileoff_t remaining = length;
|
|
int error = 0;
|
|
|
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
|
|
|
do {
|
|
int done;
|
|
xfs_bmbt_irec_t imap;
|
|
int nimaps = 1;
|
|
xfs_fsblock_t firstblock;
|
|
xfs_bmap_free_t flist;
|
|
|
|
/*
|
|
* Map the range first and check that it is a delalloc extent
|
|
* before trying to unmap the range. Otherwise we will be
|
|
* trying to remove a real extent (which requires a
|
|
* transaction) or a hole, which is probably a bad idea...
|
|
*/
|
|
error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
|
|
XFS_BMAPI_ENTIRE);
|
|
|
|
if (error) {
|
|
/* something screwed, just bail */
|
|
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
|
xfs_alert(ip->i_mount,
|
|
"Failed delalloc mapping lookup ino %lld fsb %lld.",
|
|
ip->i_ino, start_fsb);
|
|
}
|
|
break;
|
|
}
|
|
if (!nimaps) {
|
|
/* nothing there */
|
|
goto next_block;
|
|
}
|
|
if (imap.br_startblock != DELAYSTARTBLOCK) {
|
|
/* been converted, ignore */
|
|
goto next_block;
|
|
}
|
|
WARN_ON(imap.br_blockcount == 0);
|
|
|
|
/*
|
|
* Note: while we initialise the firstblock/flist pair, they
|
|
* should never be used because blocks should never be
|
|
* allocated or freed for a delalloc extent and hence we need
|
|
* don't cancel or finish them after the xfs_bunmapi() call.
|
|
*/
|
|
xfs_bmap_init(&flist, &firstblock);
|
|
error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
|
|
&flist, &done);
|
|
if (error)
|
|
break;
|
|
|
|
ASSERT(!flist.xbf_count && !flist.xbf_first);
|
|
next_block:
|
|
start_fsb++;
|
|
remaining--;
|
|
} while(remaining > 0);
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Test whether it is appropriate to check an inode for and free post EOF
|
|
* blocks. The 'force' parameter determines whether we should also consider
|
|
* regular files that are marked preallocated or append-only.
|
|
*/
|
|
bool
|
|
xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
|
|
{
|
|
/* prealloc/delalloc exists only on regular files */
|
|
if (!S_ISREG(ip->i_d.di_mode))
|
|
return false;
|
|
|
|
/*
|
|
* Zero sized files with no cached pages and delalloc blocks will not
|
|
* have speculative prealloc/delalloc blocks to remove.
|
|
*/
|
|
if (VFS_I(ip)->i_size == 0 &&
|
|
VN_CACHED(VFS_I(ip)) == 0 &&
|
|
ip->i_delayed_blks == 0)
|
|
return false;
|
|
|
|
/* If we haven't read in the extent list, then don't do it now. */
|
|
if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
|
|
return false;
|
|
|
|
/*
|
|
* Do not free real preallocated or append-only files unless the file
|
|
* has delalloc blocks and we are forced to remove them.
|
|
*/
|
|
if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
|
|
if (!force || ip->i_delayed_blks == 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is called by xfs_inactive to free any blocks beyond eof
|
|
* when the link count isn't zero and by xfs_dm_punch_hole() when
|
|
* punching a hole to EOF.
|
|
*/
|
|
int
|
|
xfs_free_eofblocks(
|
|
xfs_mount_t *mp,
|
|
xfs_inode_t *ip,
|
|
bool need_iolock)
|
|
{
|
|
xfs_trans_t *tp;
|
|
int error;
|
|
xfs_fileoff_t end_fsb;
|
|
xfs_fileoff_t last_fsb;
|
|
xfs_filblks_t map_len;
|
|
int nimaps;
|
|
xfs_bmbt_irec_t imap;
|
|
|
|
/*
|
|
* Figure out if there are any blocks beyond the end
|
|
* of the file. If not, then there is nothing to do.
|
|
*/
|
|
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
|
|
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
|
|
if (last_fsb <= end_fsb)
|
|
return 0;
|
|
map_len = last_fsb - end_fsb;
|
|
|
|
nimaps = 1;
|
|
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
|
error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
|
|
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
|
|
|
if (!error && (nimaps != 0) &&
|
|
(imap.br_startblock != HOLESTARTBLOCK ||
|
|
ip->i_delayed_blks)) {
|
|
/*
|
|
* Attach the dquots to the inode up front.
|
|
*/
|
|
error = xfs_qm_dqattach(ip, 0);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* There are blocks after the end of file.
|
|
* Free them up now by truncating the file to
|
|
* its current size.
|
|
*/
|
|
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
|
|
|
|
if (need_iolock) {
|
|
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
|
|
xfs_trans_cancel(tp, 0);
|
|
return EAGAIN;
|
|
}
|
|
}
|
|
|
|
error = xfs_trans_reserve(tp, 0,
|
|
XFS_ITRUNCATE_LOG_RES(mp),
|
|
0, XFS_TRANS_PERM_LOG_RES,
|
|
XFS_ITRUNCATE_LOG_COUNT);
|
|
if (error) {
|
|
ASSERT(XFS_FORCED_SHUTDOWN(mp));
|
|
xfs_trans_cancel(tp, 0);
|
|
if (need_iolock)
|
|
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
return error;
|
|
}
|
|
|
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
xfs_trans_ijoin(tp, ip, 0);
|
|
|
|
/*
|
|
* Do not update the on-disk file size. If we update the
|
|
* on-disk file size and then the system crashes before the
|
|
* contents of the file are flushed to disk then the files
|
|
* may be full of holes (ie NULL files bug).
|
|
*/
|
|
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
|
|
XFS_ISIZE(ip));
|
|
if (error) {
|
|
/*
|
|
* If we get an error at this point we simply don't
|
|
* bother truncating the file.
|
|
*/
|
|
xfs_trans_cancel(tp,
|
|
(XFS_TRANS_RELEASE_LOG_RES |
|
|
XFS_TRANS_ABORT));
|
|
} else {
|
|
error = xfs_trans_commit(tp,
|
|
XFS_TRANS_RELEASE_LOG_RES);
|
|
if (!error)
|
|
xfs_inode_clear_eofblocks_tag(ip);
|
|
}
|
|
|
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
if (need_iolock)
|
|
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* xfs_alloc_file_space()
|
|
* This routine allocates disk space for the given file.
|
|
*
|
|
* If alloc_type == 0, this request is for an ALLOCSP type
|
|
* request which will change the file size. In this case, no
|
|
* DMAPI event will be generated by the call. A TRUNCATE event
|
|
* will be generated later by xfs_setattr.
|
|
*
|
|
* If alloc_type != 0, this request is for a RESVSP type
|
|
* request, and a DMAPI DM_EVENT_WRITE will be generated if the
|
|
* lower block boundary byte address is less than the file's
|
|
* length.
|
|
*
|
|
* RETURNS:
|
|
* 0 on success
|
|
* errno on error
|
|
*
|
|
*/
|
|
STATIC int
|
|
xfs_alloc_file_space(
|
|
xfs_inode_t *ip,
|
|
xfs_off_t offset,
|
|
xfs_off_t len,
|
|
int alloc_type,
|
|
int attr_flags)
|
|
{
|
|
xfs_mount_t *mp = ip->i_mount;
|
|
xfs_off_t count;
|
|
xfs_filblks_t allocated_fsb;
|
|
xfs_filblks_t allocatesize_fsb;
|
|
xfs_extlen_t extsz, temp;
|
|
xfs_fileoff_t startoffset_fsb;
|
|
xfs_fsblock_t firstfsb;
|
|
int nimaps;
|
|
int quota_flag;
|
|
int rt;
|
|
xfs_trans_t *tp;
|
|
xfs_bmbt_irec_t imaps[1], *imapp;
|
|
xfs_bmap_free_t free_list;
|
|
uint qblocks, resblks, resrtextents;
|
|
int committed;
|
|
int error;
|
|
|
|
trace_xfs_alloc_file_space(ip);
|
|
|
|
if (XFS_FORCED_SHUTDOWN(mp))
|
|
return XFS_ERROR(EIO);
|
|
|
|
error = xfs_qm_dqattach(ip, 0);
|
|
if (error)
|
|
return error;
|
|
|
|
if (len <= 0)
|
|
return XFS_ERROR(EINVAL);
|
|
|
|
rt = XFS_IS_REALTIME_INODE(ip);
|
|
extsz = xfs_get_extsz_hint(ip);
|
|
|
|
count = len;
|
|
imapp = &imaps[0];
|
|
nimaps = 1;
|
|
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
|
|
allocatesize_fsb = XFS_B_TO_FSB(mp, count);
|
|
|
|
/*
|
|
* Allocate file space until done or until there is an error
|
|
*/
|
|
while (allocatesize_fsb && !error) {
|
|
xfs_fileoff_t s, e;
|
|
|
|
/*
|
|
* Determine space reservations for data/realtime.
|
|
*/
|
|
if (unlikely(extsz)) {
|
|
s = startoffset_fsb;
|
|
do_div(s, extsz);
|
|
s *= extsz;
|
|
e = startoffset_fsb + allocatesize_fsb;
|
|
if ((temp = do_mod(startoffset_fsb, extsz)))
|
|
e += temp;
|
|
if ((temp = do_mod(e, extsz)))
|
|
e += extsz - temp;
|
|
} else {
|
|
s = 0;
|
|
e = allocatesize_fsb;
|
|
}
|
|
|
|
/*
|
|
* The transaction reservation is limited to a 32-bit block
|
|
* count, hence we need to limit the number of blocks we are
|
|
* trying to reserve to avoid an overflow. We can't allocate
|
|
* more than @nimaps extents, and an extent is limited on disk
|
|
* to MAXEXTLEN (21 bits), so use that to enforce the limit.
|
|
*/
|
|
resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
|
|
if (unlikely(rt)) {
|
|
resrtextents = qblocks = resblks;
|
|
resrtextents /= mp->m_sb.sb_rextsize;
|
|
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
|
|
quota_flag = XFS_QMOPT_RES_RTBLKS;
|
|
} else {
|
|
resrtextents = 0;
|
|
resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
|
|
quota_flag = XFS_QMOPT_RES_REGBLKS;
|
|
}
|
|
|
|
/*
|
|
* Allocate and setup the transaction.
|
|
*/
|
|
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
|
|
error = xfs_trans_reserve(tp, resblks,
|
|
XFS_WRITE_LOG_RES(mp), resrtextents,
|
|
XFS_TRANS_PERM_LOG_RES,
|
|
XFS_WRITE_LOG_COUNT);
|
|
/*
|
|
* Check for running out of space
|
|
*/
|
|
if (error) {
|
|
/*
|
|
* Free the transaction structure.
|
|
*/
|
|
ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
|
|
xfs_trans_cancel(tp, 0);
|
|
break;
|
|
}
|
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
|
|
0, quota_flag);
|
|
if (error)
|
|
goto error1;
|
|
|
|
xfs_trans_ijoin(tp, ip, 0);
|
|
|
|
xfs_bmap_init(&free_list, &firstfsb);
|
|
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
|
|
allocatesize_fsb, alloc_type, &firstfsb,
|
|
0, imapp, &nimaps, &free_list);
|
|
if (error) {
|
|
goto error0;
|
|
}
|
|
|
|
/*
|
|
* Complete the transaction
|
|
*/
|
|
error = xfs_bmap_finish(&tp, &free_list, &committed);
|
|
if (error) {
|
|
goto error0;
|
|
}
|
|
|
|
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
|
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
if (error) {
|
|
break;
|
|
}
|
|
|
|
allocated_fsb = imapp->br_blockcount;
|
|
|
|
if (nimaps == 0) {
|
|
error = XFS_ERROR(ENOSPC);
|
|
break;
|
|
}
|
|
|
|
startoffset_fsb += allocated_fsb;
|
|
allocatesize_fsb -= allocated_fsb;
|
|
}
|
|
|
|
return error;
|
|
|
|
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
|
|
xfs_bmap_cancel(&free_list);
|
|
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
|
|
|
|
error1: /* Just cancel transaction */
|
|
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
|
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Zero file bytes between startoff and endoff inclusive.
|
|
* The iolock is held exclusive and no blocks are buffered.
|
|
*
|
|
* This function is used by xfs_free_file_space() to zero
|
|
* partial blocks when the range to free is not block aligned.
|
|
* When unreserving space with boundaries that are not block
|
|
* aligned we round up the start and round down the end
|
|
* boundaries and then use this function to zero the parts of
|
|
* the blocks that got dropped during the rounding.
|
|
*/
|
|
STATIC int
|
|
xfs_zero_remaining_bytes(
|
|
xfs_inode_t *ip,
|
|
xfs_off_t startoff,
|
|
xfs_off_t endoff)
|
|
{
|
|
xfs_bmbt_irec_t imap;
|
|
xfs_fileoff_t offset_fsb;
|
|
xfs_off_t lastoffset;
|
|
xfs_off_t offset;
|
|
xfs_buf_t *bp;
|
|
xfs_mount_t *mp = ip->i_mount;
|
|
int nimap;
|
|
int error = 0;
|
|
|
|
/*
|
|
* Avoid doing I/O beyond eof - it's not necessary
|
|
* since nothing can read beyond eof. The space will
|
|
* be zeroed when the file is extended anyway.
|
|
*/
|
|
if (startoff >= XFS_ISIZE(ip))
|
|
return 0;
|
|
|
|
if (endoff > XFS_ISIZE(ip))
|
|
endoff = XFS_ISIZE(ip);
|
|
|
|
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
|
|
mp->m_rtdev_targp : mp->m_ddev_targp,
|
|
BTOBB(mp->m_sb.sb_blocksize), 0);
|
|
if (!bp)
|
|
return XFS_ERROR(ENOMEM);
|
|
|
|
xfs_buf_unlock(bp);
|
|
|
|
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
|
|
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
|
nimap = 1;
|
|
error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
|
|
if (error || nimap < 1)
|
|
break;
|
|
ASSERT(imap.br_blockcount >= 1);
|
|
ASSERT(imap.br_startoff == offset_fsb);
|
|
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
|
|
if (lastoffset > endoff)
|
|
lastoffset = endoff;
|
|
if (imap.br_startblock == HOLESTARTBLOCK)
|
|
continue;
|
|
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
|
if (imap.br_state == XFS_EXT_UNWRITTEN)
|
|
continue;
|
|
XFS_BUF_UNDONE(bp);
|
|
XFS_BUF_UNWRITE(bp);
|
|
XFS_BUF_READ(bp);
|
|
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
|
|
xfsbdstrat(mp, bp);
|
|
error = xfs_buf_iowait(bp);
|
|
if (error) {
|
|
xfs_buf_ioerror_alert(bp,
|
|
"xfs_zero_remaining_bytes(read)");
|
|
break;
|
|
}
|
|
memset(bp->b_addr +
|
|
(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
|
|
0, lastoffset - offset + 1);
|
|
XFS_BUF_UNDONE(bp);
|
|
XFS_BUF_UNREAD(bp);
|
|
XFS_BUF_WRITE(bp);
|
|
xfsbdstrat(mp, bp);
|
|
error = xfs_buf_iowait(bp);
|
|
if (error) {
|
|
xfs_buf_ioerror_alert(bp,
|
|
"xfs_zero_remaining_bytes(write)");
|
|
break;
|
|
}
|
|
}
|
|
xfs_buf_free(bp);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* xfs_free_file_space()
|
|
* This routine frees disk space for the given file.
|
|
*
|
|
* This routine is only called by xfs_change_file_space
|
|
* for an UNRESVSP type call.
|
|
*
|
|
* RETURNS:
|
|
* 0 on success
|
|
* errno on error
|
|
*
|
|
*/
|
|
STATIC int
|
|
xfs_free_file_space(
|
|
xfs_inode_t *ip,
|
|
xfs_off_t offset,
|
|
xfs_off_t len,
|
|
int attr_flags)
|
|
{
|
|
int committed;
|
|
int done;
|
|
xfs_fileoff_t endoffset_fsb;
|
|
int error;
|
|
xfs_fsblock_t firstfsb;
|
|
xfs_bmap_free_t free_list;
|
|
xfs_bmbt_irec_t imap;
|
|
xfs_off_t ioffset;
|
|
xfs_extlen_t mod=0;
|
|
xfs_mount_t *mp;
|
|
int nimap;
|
|
uint resblks;
|
|
xfs_off_t rounding;
|
|
int rt;
|
|
xfs_fileoff_t startoffset_fsb;
|
|
xfs_trans_t *tp;
|
|
int need_iolock = 1;
|
|
|
|
mp = ip->i_mount;
|
|
|
|
trace_xfs_free_file_space(ip);
|
|
|
|
error = xfs_qm_dqattach(ip, 0);
|
|
if (error)
|
|
return error;
|
|
|
|
error = 0;
|
|
if (len <= 0) /* if nothing being freed */
|
|
return error;
|
|
rt = XFS_IS_REALTIME_INODE(ip);
|
|
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
|
|
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
|
|
|
|
if (attr_flags & XFS_ATTR_NOLOCK)
|
|
need_iolock = 0;
|
|
if (need_iolock) {
|
|
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
|
/* wait for the completion of any pending DIOs */
|
|
inode_dio_wait(VFS_I(ip));
|
|
}
|
|
|
|
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
|
|
ioffset = offset & ~(rounding - 1);
|
|
error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
|
|
ioffset, -1);
|
|
if (error)
|
|
goto out_unlock_iolock;
|
|
truncate_pagecache_range(VFS_I(ip), ioffset, -1);
|
|
|
|
/*
|
|
* Need to zero the stuff we're not freeing, on disk.
|
|
* If it's a realtime file & can't use unwritten extents then we
|
|
* actually need to zero the extent edges. Otherwise xfs_bunmapi
|
|
* will take care of it for us.
|
|
*/
|
|
if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
|
|
nimap = 1;
|
|
error = xfs_bmapi_read(ip, startoffset_fsb, 1,
|
|
&imap, &nimap, 0);
|
|
if (error)
|
|
goto out_unlock_iolock;
|
|
ASSERT(nimap == 0 || nimap == 1);
|
|
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
|
|
xfs_daddr_t block;
|
|
|
|
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
|
block = imap.br_startblock;
|
|
mod = do_div(block, mp->m_sb.sb_rextsize);
|
|
if (mod)
|
|
startoffset_fsb += mp->m_sb.sb_rextsize - mod;
|
|
}
|
|
nimap = 1;
|
|
error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
|
|
&imap, &nimap, 0);
|
|
if (error)
|
|
goto out_unlock_iolock;
|
|
ASSERT(nimap == 0 || nimap == 1);
|
|
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
|
|
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
|
mod++;
|
|
if (mod && (mod != mp->m_sb.sb_rextsize))
|
|
endoffset_fsb -= mod;
|
|
}
|
|
}
|
|
if ((done = (endoffset_fsb <= startoffset_fsb)))
|
|
/*
|
|
* One contiguous piece to clear
|
|
*/
|
|
error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
|
|
else {
|
|
/*
|
|
* Some full blocks, possibly two pieces to clear
|
|
*/
|
|
if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
|
|
error = xfs_zero_remaining_bytes(ip, offset,
|
|
XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
|
|
if (!error &&
|
|
XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
|
|
error = xfs_zero_remaining_bytes(ip,
|
|
XFS_FSB_TO_B(mp, endoffset_fsb),
|
|
offset + len - 1);
|
|
}
|
|
|
|
/*
|
|
* free file space until done or until there is an error
|
|
*/
|
|
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
|
|
while (!error && !done) {
|
|
|
|
/*
|
|
* allocate and setup the transaction. Allow this
|
|
* transaction to dip into the reserve blocks to ensure
|
|
* the freeing of the space succeeds at ENOSPC.
|
|
*/
|
|
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
|
|
tp->t_flags |= XFS_TRANS_RESERVE;
|
|
error = xfs_trans_reserve(tp,
|
|
resblks,
|
|
XFS_WRITE_LOG_RES(mp),
|
|
0,
|
|
XFS_TRANS_PERM_LOG_RES,
|
|
XFS_WRITE_LOG_COUNT);
|
|
|
|
/*
|
|
* check for running out of space
|
|
*/
|
|
if (error) {
|
|
/*
|
|
* Free the transaction structure.
|
|
*/
|
|
ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
|
|
xfs_trans_cancel(tp, 0);
|
|
break;
|
|
}
|
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
error = xfs_trans_reserve_quota(tp, mp,
|
|
ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
|
|
resblks, 0, XFS_QMOPT_RES_REGBLKS);
|
|
if (error)
|
|
goto error1;
|
|
|
|
xfs_trans_ijoin(tp, ip, 0);
|
|
|
|
/*
|
|
* issue the bunmapi() call to free the blocks
|
|
*/
|
|
xfs_bmap_init(&free_list, &firstfsb);
|
|
error = xfs_bunmapi(tp, ip, startoffset_fsb,
|
|
endoffset_fsb - startoffset_fsb,
|
|
0, 2, &firstfsb, &free_list, &done);
|
|
if (error) {
|
|
goto error0;
|
|
}
|
|
|
|
/*
|
|
* complete the transaction
|
|
*/
|
|
error = xfs_bmap_finish(&tp, &free_list, &committed);
|
|
if (error) {
|
|
goto error0;
|
|
}
|
|
|
|
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
|
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
}
|
|
|
|
out_unlock_iolock:
|
|
if (need_iolock)
|
|
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
return error;
|
|
|
|
error0:
|
|
xfs_bmap_cancel(&free_list);
|
|
error1:
|
|
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
|
|
xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
|
|
XFS_ILOCK_EXCL);
|
|
return error;
|
|
}
|
|
|
|
|
|
STATIC int
|
|
xfs_zero_file_space(
|
|
struct xfs_inode *ip,
|
|
xfs_off_t offset,
|
|
xfs_off_t len,
|
|
int attr_flags)
|
|
{
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
uint granularity;
|
|
xfs_off_t start_boundary;
|
|
xfs_off_t end_boundary;
|
|
int error;
|
|
|
|
granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
|
|
|
|
/*
|
|
* Round the range of extents we are going to convert inwards. If the
|
|
* offset is aligned, then it doesn't get changed so we zero from the
|
|
* start of the block offset points to.
|
|
*/
|
|
start_boundary = round_up(offset, granularity);
|
|
end_boundary = round_down(offset + len, granularity);
|
|
|
|
ASSERT(start_boundary >= offset);
|
|
ASSERT(end_boundary <= offset + len);
|
|
|
|
if (!(attr_flags & XFS_ATTR_NOLOCK))
|
|
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
|
|
|
if (start_boundary < end_boundary - 1) {
|
|
/* punch out the page cache over the conversion range */
|
|
truncate_pagecache_range(VFS_I(ip), start_boundary,
|
|
end_boundary - 1);
|
|
/* convert the blocks */
|
|
error = xfs_alloc_file_space(ip, start_boundary,
|
|
end_boundary - start_boundary - 1,
|
|
XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
|
|
attr_flags);
|
|
if (error)
|
|
goto out_unlock;
|
|
|
|
/* We've handled the interior of the range, now for the edges */
|
|
if (start_boundary != offset)
|
|
error = xfs_iozero(ip, offset, start_boundary - offset);
|
|
if (error)
|
|
goto out_unlock;
|
|
|
|
if (end_boundary != offset + len)
|
|
error = xfs_iozero(ip, end_boundary,
|
|
offset + len - end_boundary);
|
|
|
|
} else {
|
|
/*
|
|
* It's either a sub-granularity range or the range spanned lies
|
|
* partially across two adjacent blocks.
|
|
*/
|
|
error = xfs_iozero(ip, offset, len);
|
|
}
|
|
|
|
out_unlock:
|
|
if (!(attr_flags & XFS_ATTR_NOLOCK))
|
|
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
return error;
|
|
|
|
}
|
|
|
|
/*
|
|
* xfs_change_file_space()
|
|
* This routine allocates or frees disk space for the given file.
|
|
* The user specified parameters are checked for alignment and size
|
|
* limitations.
|
|
*
|
|
* RETURNS:
|
|
* 0 on success
|
|
* errno on error
|
|
*
|
|
*/
|
|
int
|
|
xfs_change_file_space(
|
|
xfs_inode_t *ip,
|
|
int cmd,
|
|
xfs_flock64_t *bf,
|
|
xfs_off_t offset,
|
|
int attr_flags)
|
|
{
|
|
xfs_mount_t *mp = ip->i_mount;
|
|
int clrprealloc;
|
|
int error;
|
|
xfs_fsize_t fsize;
|
|
int setprealloc;
|
|
xfs_off_t startoffset;
|
|
xfs_trans_t *tp;
|
|
struct iattr iattr;
|
|
|
|
if (!S_ISREG(ip->i_d.di_mode))
|
|
return XFS_ERROR(EINVAL);
|
|
|
|
switch (bf->l_whence) {
|
|
case 0: /*SEEK_SET*/
|
|
break;
|
|
case 1: /*SEEK_CUR*/
|
|
bf->l_start += offset;
|
|
break;
|
|
case 2: /*SEEK_END*/
|
|
bf->l_start += XFS_ISIZE(ip);
|
|
break;
|
|
default:
|
|
return XFS_ERROR(EINVAL);
|
|
}
|
|
|
|
/*
|
|
* length of <= 0 for resv/unresv/zero is invalid. length for
|
|
* alloc/free is ignored completely and we have no idea what userspace
|
|
* might have set it to, so set it to zero to allow range
|
|
* checks to pass.
|
|
*/
|
|
switch (cmd) {
|
|
case XFS_IOC_ZERO_RANGE:
|
|
case XFS_IOC_RESVSP:
|
|
case XFS_IOC_RESVSP64:
|
|
case XFS_IOC_UNRESVSP:
|
|
case XFS_IOC_UNRESVSP64:
|
|
if (bf->l_len <= 0)
|
|
return XFS_ERROR(EINVAL);
|
|
break;
|
|
default:
|
|
bf->l_len = 0;
|
|
break;
|
|
}
|
|
|
|
if (bf->l_start < 0 ||
|
|
bf->l_start > mp->m_super->s_maxbytes ||
|
|
bf->l_start + bf->l_len < 0 ||
|
|
bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
|
|
return XFS_ERROR(EINVAL);
|
|
|
|
bf->l_whence = 0;
|
|
|
|
startoffset = bf->l_start;
|
|
fsize = XFS_ISIZE(ip);
|
|
|
|
setprealloc = clrprealloc = 0;
|
|
switch (cmd) {
|
|
case XFS_IOC_ZERO_RANGE:
|
|
error = xfs_zero_file_space(ip, startoffset, bf->l_len,
|
|
attr_flags);
|
|
if (error)
|
|
return error;
|
|
setprealloc = 1;
|
|
break;
|
|
|
|
case XFS_IOC_RESVSP:
|
|
case XFS_IOC_RESVSP64:
|
|
error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
|
|
XFS_BMAPI_PREALLOC, attr_flags);
|
|
if (error)
|
|
return error;
|
|
setprealloc = 1;
|
|
break;
|
|
|
|
case XFS_IOC_UNRESVSP:
|
|
case XFS_IOC_UNRESVSP64:
|
|
if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
|
|
attr_flags)))
|
|
return error;
|
|
break;
|
|
|
|
case XFS_IOC_ALLOCSP:
|
|
case XFS_IOC_ALLOCSP64:
|
|
case XFS_IOC_FREESP:
|
|
case XFS_IOC_FREESP64:
|
|
/*
|
|
* These operations actually do IO when extending the file, but
|
|
* the allocation is done seperately to the zeroing that is
|
|
* done. This set of operations need to be serialised against
|
|
* other IO operations, such as truncate and buffered IO. We
|
|
* need to take the IOLOCK here to serialise the allocation and
|
|
* zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
|
|
* truncate, direct IO) from racing against the transient
|
|
* allocated but not written state we can have here.
|
|
*/
|
|
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
|
if (startoffset > fsize) {
|
|
error = xfs_alloc_file_space(ip, fsize,
|
|
startoffset - fsize, 0,
|
|
attr_flags | XFS_ATTR_NOLOCK);
|
|
if (error) {
|
|
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
break;
|
|
}
|
|
}
|
|
|
|
iattr.ia_valid = ATTR_SIZE;
|
|
iattr.ia_size = startoffset;
|
|
|
|
error = xfs_setattr_size(ip, &iattr,
|
|
attr_flags | XFS_ATTR_NOLOCK);
|
|
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
|
|
if (error)
|
|
return error;
|
|
|
|
clrprealloc = 1;
|
|
break;
|
|
|
|
default:
|
|
ASSERT(0);
|
|
return XFS_ERROR(EINVAL);
|
|
}
|
|
|
|
/*
|
|
* update the inode timestamp, mode, and prealloc flag bits
|
|
*/
|
|
tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
|
|
|
|
if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
|
|
0, 0, 0))) {
|
|
/* ASSERT(0); */
|
|
xfs_trans_cancel(tp, 0);
|
|
return error;
|
|
}
|
|
|
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
|
|
|
|
if ((attr_flags & XFS_ATTR_DMI) == 0) {
|
|
ip->i_d.di_mode &= ~S_ISUID;
|
|
|
|
/*
|
|
* Note that we don't have to worry about mandatory
|
|
* file locking being disabled here because we only
|
|
* clear the S_ISGID bit if the Group execute bit is
|
|
* on, but if it was on then mandatory locking wouldn't
|
|
* have been enabled.
|
|
*/
|
|
if (ip->i_d.di_mode & S_IXGRP)
|
|
ip->i_d.di_mode &= ~S_ISGID;
|
|
|
|
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
|
|
}
|
|
if (setprealloc)
|
|
ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
|
|
else if (clrprealloc)
|
|
ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
|
|
|
|
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
|
if (attr_flags & XFS_ATTR_SYNC)
|
|
xfs_trans_set_sync(tp);
|
|
return xfs_trans_commit(tp, 0);
|
|
}
|