forked from Minki/linux
Updates for 4.14:
- Write unmount record for a ro mount to avoid unnecessary log replay - Clean up orphaned inodes when mounting fs readonly - Resubmit inode log items when buffer writeback fails to avoid umount hang - Fix log recovery corruption problems when log headers wrap around the end - Avoid infinite loop searching for free inodes when inode counters are wrong - Evict inodes involved with log redo so that we don't leak them later - Fix a potential race between reclaim and inode cluster freeing - Refactor the inode joining code w.r.t. transaction rolling & deferred ops - Fix a bug where the log doesn't properly deal with dirty buffers that are about to become ordered buffers - Fix the extent swap code to deal with making dirty buffers ordered properly - Consolidate page fault handlers - Refactor the incore extent manipulation functions to use the iext abstractions instead of directly modifying with extent data - Disable crashy chattr +/-x until we fix it - Don't allow us to set S_DAX for v2 inodes - Various cleanups - Clarify some documentation - Fix a problem where fsync and a log commit race to send the disk a flush command, resulting in a small window where power fail data loss could occur - Simplify some rmap operations in the fcollapse code - Fix some use-after-free problems in async writeback -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCgAGBQJZrEAQAAoJEPh/dxk0SrTrxKEP/3y8sLWdy4fUdPpVkwZteXwc zGyYaLrmKRc5i6abBNtLCZoRJGfRdvVyPrhQ1q3mt8H//xuURgqgFFyjj3wAdsLf sDejIHhdsc8/VcuLLtCW3rEYg58hJ89hW7d1InCP0tvqWmljh9svhzXebtwUvNNF /2fHIUXUiAxLbgjv/N2i/smlLl0zdx6C2x1TlJmfwer0UMTAnlmbFWxCqmtUZwSl QSuGgn1wo3dkId9aFoNwQmSCFeYcxQlpaInJEzUiVQOA4dbphXHO9Bsx0eOkpDuz 39waaX0fld8LEfIQGmUQ995UkAwfk/asjgDSApyXdkMayNWhi0KpRl1zXgCb8BbL m7vYJhIfJ399+jbNPe1+htn3I16AmpvAai9MNJidFclWwqFEuQEnxZccdtTIAiRv XuYiq9hN2NOwlwPUYfrZxfx34fdocRyHmGVs3i7P3/qPWd5Hx6+FpQTOngciS7MN 6xnM8PbnrLadw3ooMDEKgWsN805BQALiwzDRggoAXG1Pm2SqFnLD/dAR4c7R3nR8 vvYlfGHnd38aMlW73IALkkGJqZy/bHPFhrbvpjXyIG6SYwCjrWrO0chM0O8MCRrF MIW3rM5hYIE8aCkpJ2mxvcQalmSAlSPVKlmgvSK4S1Sz4kcywxskNhch8uNkb5uy WUHhrJz+wBjdjrDOU3aL =jBdo -----END PGP SIGNATURE----- Merge tag 'xfs-4.14-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull XFS updates from Darrick Wong: "Here are the changes for xfs for 4.14. Most of these are cleanups and fixes for bad behavior, as we're mostly focusing on improving reliablity this cycle (read: there's potentially a lot of stuff on the horizon for 4.15 so better to spend a few weeks killing other bugs now). Summary: - Write unmount record for a ro mount to avoid unnecessary log replay - Clean up orphaned inodes when mounting fs readonly - Resubmit inode log items when buffer writeback fails to avoid umount hang - Fix log recovery corruption problems when log headers wrap around the end - Avoid infinite loop searching for free inodes when inode counters are wrong - Evict inodes involved with log redo so that we don't leak them later - Fix a potential race between reclaim and inode cluster freeing - Refactor the inode joining code w.r.t. transaction rolling & deferred ops - Fix a bug where the log doesn't properly deal with dirty buffers that are about to become ordered buffers - Fix the extent swap code to deal with making dirty buffers ordered properly - Consolidate page fault handlers - Refactor the incore extent manipulation functions to use the iext abstractions instead of directly modifying with extent data - Disable crashy chattr +/-x until we fix it - Don't allow us to set S_DAX for v2 inodes - Various cleanups - Clarify some documentation - Fix a problem where fsync and a log commit race to send the disk a flush command, resulting in a small window where power fail data loss could occur - Simplify some rmap operations in the fcollapse code - Fix some use-after-free problems in async writeback" * tag 'xfs-4.14-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (44 commits) xfs: use kmem_free to free return value of kmem_zalloc xfs: open code end_buffer_async_write in xfs_finish_page_writeback xfs: don't set v3 xflags for v2 inodes xfs: fix compiler warnings fsmap: fix documentation of FMR_OF_LAST xfs: simplify the rmap code in xfs_bmse_merge xfs: remove unused flags arg from xfs_file_iomap_begin_delay xfs: fix incorrect log_flushed on fsync xfs: disable per-inode DAX flag xfs: replace xfs_qm_get_rtblks with a direct call to xfs_bmap_count_leaves xfs: rewrite xfs_bmap_count_leaves using xfs_iext_get_extent xfs: use xfs_iext_*_extent helpers in xfs_bmap_split_extent_at xfs: use xfs_iext_*_extent helpers in xfs_bmap_shift_extents xfs: move some code around inside xfs_bmap_shift_extents xfs: use xfs_iext_get_extent in xfs_bmap_first_unused xfs: switch xfs_bmap_local_to_extents to use xfs_iext_insert xfs: add a xfs_iext_update_extent helper xfs: consolidate the various page fault handlers iomap: return VM_FAULT_* codes from iomap_page_mkwrite xfs: relog dirty buffers during swapext bmbt owner change ...
This commit is contained in:
commit
5791577963
@ -637,6 +637,7 @@ again:
|
||||
|
||||
dispose_list(&dispose);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(evict_inodes);
|
||||
|
||||
/**
|
||||
* invalidate_inodes - attempt to free all inodes on a superblock
|
||||
|
@ -132,7 +132,6 @@ static inline bool atime_needs_update_rcu(const struct path *path,
|
||||
extern void inode_io_list_del(struct inode *inode);
|
||||
|
||||
extern long get_nr_dirty_inodes(void);
|
||||
extern void evict_inodes(struct super_block *);
|
||||
extern int invalidate_inodes(struct super_block *, bool);
|
||||
|
||||
/*
|
||||
|
@ -477,10 +477,10 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
|
||||
|
||||
set_page_dirty(page);
|
||||
wait_for_stable_page(page);
|
||||
return 0;
|
||||
return VM_FAULT_LOCKED;
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
return ret;
|
||||
return block_page_mkwrite_return(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
|
||||
|
||||
|
@ -328,20 +328,19 @@ xfs_attr_set(
|
||||
*/
|
||||
xfs_defer_init(args.dfops, args.firstblock);
|
||||
error = xfs_attr_shortform_to_leaf(&args);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args.trans, args.dfops, dp);
|
||||
if (error) {
|
||||
args.trans = NULL;
|
||||
xfs_defer_cancel(&dfops);
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args.dfops, dp);
|
||||
error = xfs_defer_finish(&args.trans, args.dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
|
||||
/*
|
||||
* Commit the leaf transformation. We'll need another (linked)
|
||||
* transaction to add the new attribute to the leaf.
|
||||
*/
|
||||
|
||||
error = xfs_trans_roll(&args.trans, dp);
|
||||
error = xfs_trans_roll_inode(&args.trans, dp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -373,6 +372,9 @@ xfs_attr_set(
|
||||
|
||||
return error;
|
||||
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(&dfops);
|
||||
args.trans = NULL;
|
||||
out:
|
||||
if (args.trans)
|
||||
xfs_trans_cancel(args.trans);
|
||||
@ -593,19 +595,18 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
|
||||
*/
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_attr3_leaf_to_node(args);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans, args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
return error;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
|
||||
/*
|
||||
* Commit the current trans (including the inode) and start
|
||||
* a new one.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -620,7 +621,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
|
||||
* Commit the transaction that added the attr name so that
|
||||
* later routines can manage their own transactions.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -684,20 +685,18 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
|
||||
/* bp is gone due to xfs_da_shrink_inode */
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans,
|
||||
args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
return error;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
}
|
||||
|
||||
/*
|
||||
* Commit the remove and start the next trans in series.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
|
||||
} else if (args->rmtblkno > 0) {
|
||||
/*
|
||||
@ -706,6 +705,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
|
||||
error = xfs_attr3_leaf_clearflag(args);
|
||||
}
|
||||
return error;
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(args->dfops);
|
||||
args->trans = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -747,15 +750,18 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
|
||||
/* bp is gone due to xfs_da_shrink_inode */
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans, args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
return error;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
}
|
||||
return 0;
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(args->dfops);
|
||||
args->trans = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -872,20 +878,18 @@ restart:
|
||||
state = NULL;
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_attr3_leaf_to_node(args);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans,
|
||||
args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
|
||||
/*
|
||||
* Commit the node conversion and start the next
|
||||
* trans in the chain.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -900,13 +904,12 @@ restart:
|
||||
*/
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_da3_split(state);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans, args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
} else {
|
||||
/*
|
||||
* Addition succeeded, update Btree hashvals.
|
||||
@ -925,7 +928,7 @@ restart:
|
||||
* Commit the leaf addition or btree split and start the next
|
||||
* trans in the chain.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -999,20 +1002,18 @@ restart:
|
||||
if (retval && (state->path.active > 1)) {
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_da3_join(state);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans,
|
||||
args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
}
|
||||
|
||||
/*
|
||||
* Commit and start the next trans in the chain.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -1032,6 +1033,10 @@ out:
|
||||
if (error)
|
||||
return error;
|
||||
return retval;
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(args->dfops);
|
||||
args->trans = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1122,17 +1127,16 @@ xfs_attr_node_removename(xfs_da_args_t *args)
|
||||
if (retval && (state->path.active > 1)) {
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_da3_join(state);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans, args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
/*
|
||||
* Commit the Btree join operation and start a new trans.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
@ -1156,14 +1160,12 @@ xfs_attr_node_removename(xfs_da_args_t *args)
|
||||
xfs_defer_init(args->dfops, args->firstblock);
|
||||
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
|
||||
/* bp is gone due to xfs_da_shrink_inode */
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans,
|
||||
args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
} else
|
||||
xfs_trans_brelse(args->trans, bp);
|
||||
}
|
||||
@ -1172,6 +1174,10 @@ xfs_attr_node_removename(xfs_da_args_t *args)
|
||||
out:
|
||||
xfs_da_state_free(state);
|
||||
return error;
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(args->dfops);
|
||||
args->trans = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2608,7 +2608,7 @@ xfs_attr3_leaf_clearflag(
|
||||
/*
|
||||
* Commit the flag value change and start the next trans in series.
|
||||
*/
|
||||
return xfs_trans_roll(&args->trans, args->dp);
|
||||
return xfs_trans_roll_inode(&args->trans, args->dp);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2659,7 +2659,7 @@ xfs_attr3_leaf_setflag(
|
||||
/*
|
||||
* Commit the flag value change and start the next trans in series.
|
||||
*/
|
||||
return xfs_trans_roll(&args->trans, args->dp);
|
||||
return xfs_trans_roll_inode(&args->trans, args->dp);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2777,7 +2777,7 @@ xfs_attr3_leaf_flipflags(
|
||||
/*
|
||||
* Commit the flag value change and start the next trans in series.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, args->dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, args->dp);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
@ -467,13 +467,12 @@ xfs_attr_rmtval_set(
|
||||
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
|
||||
blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
|
||||
args->total, &map, &nmap, args->dfops);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans, args->dfops, dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
return error;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
|
||||
ASSERT(nmap == 1);
|
||||
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
|
||||
@ -484,7 +483,7 @@ xfs_attr_rmtval_set(
|
||||
/*
|
||||
* Start the next trans in the chain.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, dp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
@ -539,6 +538,10 @@ xfs_attr_rmtval_set(
|
||||
}
|
||||
ASSERT(valuelen == 0);
|
||||
return 0;
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(args->dfops);
|
||||
args->trans = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -609,21 +612,23 @@ xfs_attr_rmtval_remove(
|
||||
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
|
||||
XFS_BMAPI_ATTRFORK, 1, args->firstblock,
|
||||
args->dfops, &done);
|
||||
if (!error)
|
||||
error = xfs_defer_finish(&args->trans, args->dfops,
|
||||
args->dp);
|
||||
if (error) {
|
||||
args->trans = NULL;
|
||||
xfs_defer_cancel(args->dfops);
|
||||
return error;
|
||||
}
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
xfs_defer_ijoin(args->dfops, args->dp);
|
||||
error = xfs_defer_finish(&args->trans, args->dfops);
|
||||
if (error)
|
||||
goto out_defer_cancel;
|
||||
|
||||
/*
|
||||
* Close out trans and start the next one in the chain.
|
||||
*/
|
||||
error = xfs_trans_roll(&args->trans, args->dp);
|
||||
error = xfs_trans_roll_inode(&args->trans, args->dp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
return 0;
|
||||
out_defer_cancel:
|
||||
xfs_defer_cancel(args->dfops);
|
||||
args->trans = NULL;
|
||||
return error;
|
||||
}
|
||||
|
@ -579,7 +579,7 @@ xfs_bmap_validate_ret(
|
||||
|
||||
#else
|
||||
#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
|
||||
#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
|
||||
#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0)
|
||||
#endif /* DEBUG */
|
||||
|
||||
/*
|
||||
@ -880,7 +880,7 @@ xfs_bmap_local_to_extents(
|
||||
xfs_ifork_t *ifp; /* inode fork pointer */
|
||||
xfs_alloc_arg_t args; /* allocation arguments */
|
||||
xfs_buf_t *bp; /* buffer for extent block */
|
||||
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
|
||||
struct xfs_bmbt_irec rec;
|
||||
|
||||
/*
|
||||
* We don't want to deal with the case of keeping inode data inline yet.
|
||||
@ -943,9 +943,12 @@ xfs_bmap_local_to_extents(
|
||||
xfs_bmap_local_to_extents_empty(ip, whichfork);
|
||||
flags |= XFS_ILOG_CORE;
|
||||
|
||||
xfs_iext_add(ifp, 0, 1);
|
||||
ep = xfs_iext_get_ext(ifp, 0);
|
||||
xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
|
||||
rec.br_startoff = 0;
|
||||
rec.br_startblock = args.fsbno;
|
||||
rec.br_blockcount = 1;
|
||||
rec.br_state = XFS_EXT_NORM;
|
||||
xfs_iext_insert(ip, 0, 1, &rec, 0);
|
||||
|
||||
trace_xfs_bmap_post_update(ip, 0,
|
||||
whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
|
||||
_THIS_IP_);
|
||||
@ -1196,7 +1199,7 @@ xfs_bmap_add_attrfork(
|
||||
xfs_log_sb(tp);
|
||||
}
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto bmap_cancel;
|
||||
error = xfs_trans_commit(tp);
|
||||
@ -1356,7 +1359,6 @@ xfs_bmap_first_unused(
|
||||
xfs_fileoff_t lastaddr; /* last block number seen */
|
||||
xfs_fileoff_t lowest; /* lowest useful block */
|
||||
xfs_fileoff_t max; /* starting useful block */
|
||||
xfs_fileoff_t off; /* offset for this block */
|
||||
xfs_extnum_t nextents; /* number of extent entries */
|
||||
|
||||
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
|
||||
@ -1373,16 +1375,19 @@ xfs_bmap_first_unused(
|
||||
lowest = *first_unused;
|
||||
nextents = xfs_iext_count(ifp);
|
||||
for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
|
||||
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
|
||||
off = xfs_bmbt_get_startoff(ep);
|
||||
struct xfs_bmbt_irec got;
|
||||
|
||||
xfs_iext_get_extent(ifp, idx, &got);
|
||||
|
||||
/*
|
||||
* See if the hole before this extent will work.
|
||||
*/
|
||||
if (off >= lowest + len && off - max >= len) {
|
||||
if (got.br_startoff >= lowest + len &&
|
||||
got.br_startoff - max >= len) {
|
||||
*first_unused = max;
|
||||
return 0;
|
||||
}
|
||||
lastaddr = off + xfs_bmbt_get_blockcount(ep);
|
||||
lastaddr = got.br_startoff + got.br_blockcount;
|
||||
max = XFS_FILEOFF_MAX(lastaddr, lowest);
|
||||
}
|
||||
*first_unused = max;
|
||||
@ -4918,7 +4923,7 @@ xfs_bmap_del_extent_delay(
|
||||
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
|
||||
got->br_blockcount), da_old);
|
||||
got->br_startblock = nullstartblock((int)da_new);
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
|
||||
xfs_iext_update_extent(ifp, *idx, got);
|
||||
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
|
||||
break;
|
||||
case BMAP_RIGHT_CONTIG:
|
||||
@ -4930,7 +4935,7 @@ xfs_bmap_del_extent_delay(
|
||||
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
|
||||
got->br_blockcount), da_old);
|
||||
got->br_startblock = nullstartblock((int)da_new);
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
|
||||
xfs_iext_update_extent(ifp, *idx, got);
|
||||
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
|
||||
break;
|
||||
case 0:
|
||||
@ -4956,7 +4961,7 @@ xfs_bmap_del_extent_delay(
|
||||
del->br_blockcount);
|
||||
|
||||
got->br_startblock = nullstartblock((int)got_indlen);
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
|
||||
xfs_iext_update_extent(ifp, *idx, got);
|
||||
trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
|
||||
|
||||
new.br_startoff = del_endoff;
|
||||
@ -5026,7 +5031,7 @@ xfs_bmap_del_extent_cow(
|
||||
got->br_startoff = del_endoff;
|
||||
got->br_blockcount -= del->br_blockcount;
|
||||
got->br_startblock = del->br_startblock + del->br_blockcount;
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
|
||||
xfs_iext_update_extent(ifp, *idx, got);
|
||||
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
|
||||
break;
|
||||
case BMAP_RIGHT_CONTIG:
|
||||
@ -5035,7 +5040,7 @@ xfs_bmap_del_extent_cow(
|
||||
*/
|
||||
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
|
||||
got->br_blockcount -= del->br_blockcount;
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
|
||||
xfs_iext_update_extent(ifp, *idx, got);
|
||||
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
|
||||
break;
|
||||
case 0:
|
||||
@ -5044,7 +5049,7 @@ xfs_bmap_del_extent_cow(
|
||||
*/
|
||||
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
|
||||
got->br_blockcount = del->br_startoff - got->br_startoff;
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
|
||||
xfs_iext_update_extent(ifp, *idx, got);
|
||||
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
|
||||
|
||||
new.br_startoff = del_endoff;
|
||||
@ -5876,32 +5881,26 @@ xfs_bmse_merge(
|
||||
int whichfork,
|
||||
xfs_fileoff_t shift, /* shift fsb */
|
||||
int current_ext, /* idx of gotp */
|
||||
struct xfs_bmbt_rec_host *gotp, /* extent to shift */
|
||||
struct xfs_bmbt_rec_host *leftp, /* preceding extent */
|
||||
struct xfs_bmbt_irec *got, /* extent to shift */
|
||||
struct xfs_bmbt_irec *left, /* preceding extent */
|
||||
struct xfs_btree_cur *cur,
|
||||
int *logflags) /* output */
|
||||
int *logflags, /* output */
|
||||
struct xfs_defer_ops *dfops)
|
||||
{
|
||||
struct xfs_bmbt_irec got;
|
||||
struct xfs_bmbt_irec left;
|
||||
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
|
||||
struct xfs_bmbt_irec new;
|
||||
xfs_filblks_t blockcount;
|
||||
int error, i;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
xfs_bmbt_get_all(gotp, &got);
|
||||
xfs_bmbt_get_all(leftp, &left);
|
||||
blockcount = left.br_blockcount + got.br_blockcount;
|
||||
blockcount = left->br_blockcount + got->br_blockcount;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
ASSERT(xfs_bmse_can_merge(&left, &got, shift));
|
||||
ASSERT(xfs_bmse_can_merge(left, got, shift));
|
||||
|
||||
/*
|
||||
* Merge the in-core extents. Note that the host record pointers and
|
||||
* current_ext index are invalid once the extent has been removed via
|
||||
* xfs_iext_remove().
|
||||
*/
|
||||
xfs_bmbt_set_blockcount(leftp, blockcount);
|
||||
xfs_iext_remove(ip, current_ext, 1, 0);
|
||||
new = *left;
|
||||
new.br_blockcount = blockcount;
|
||||
|
||||
/*
|
||||
* Update the on-disk extent count, the btree if necessary and log the
|
||||
@ -5912,12 +5911,12 @@ xfs_bmse_merge(
|
||||
*logflags |= XFS_ILOG_CORE;
|
||||
if (!cur) {
|
||||
*logflags |= XFS_ILOG_DEXT;
|
||||
return 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* lookup and remove the extent to merge */
|
||||
error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
|
||||
got.br_blockcount, &i);
|
||||
error = xfs_bmbt_lookup_eq(cur, got->br_startoff, got->br_startblock,
|
||||
got->br_blockcount, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
|
||||
@ -5928,16 +5927,28 @@ xfs_bmse_merge(
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
|
||||
|
||||
/* lookup and update size of the previous extent */
|
||||
error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
|
||||
left.br_blockcount, &i);
|
||||
error = xfs_bmbt_lookup_eq(cur, left->br_startoff, left->br_startblock,
|
||||
left->br_blockcount, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
|
||||
|
||||
left.br_blockcount = blockcount;
|
||||
error = xfs_bmbt_update(cur, new.br_startoff, new.br_startblock,
|
||||
new.br_blockcount, new.br_state);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
|
||||
left.br_blockcount, left.br_state);
|
||||
done:
|
||||
xfs_iext_update_extent(ifp, current_ext - 1, &new);
|
||||
xfs_iext_remove(ip, current_ext, 1, 0);
|
||||
|
||||
/* update reverse mapping. rmap functions merge the rmaps for us */
|
||||
error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
|
||||
if (error)
|
||||
return error;
|
||||
memcpy(&new, got, sizeof(new));
|
||||
new.br_startoff = left->br_startoff + left->br_blockcount;
|
||||
return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5949,7 +5960,7 @@ xfs_bmse_shift_one(
|
||||
int whichfork,
|
||||
xfs_fileoff_t offset_shift_fsb,
|
||||
int *current_ext,
|
||||
struct xfs_bmbt_rec_host *gotp,
|
||||
struct xfs_bmbt_irec *got,
|
||||
struct xfs_btree_cur *cur,
|
||||
int *logflags,
|
||||
enum shift_direction direction,
|
||||
@ -5958,9 +5969,7 @@ xfs_bmse_shift_one(
|
||||
struct xfs_ifork *ifp;
|
||||
struct xfs_mount *mp;
|
||||
xfs_fileoff_t startoff;
|
||||
struct xfs_bmbt_rec_host *adj_irecp;
|
||||
struct xfs_bmbt_irec got;
|
||||
struct xfs_bmbt_irec adj_irec;
|
||||
struct xfs_bmbt_irec adj_irec, new;
|
||||
int error;
|
||||
int i;
|
||||
int total_extents;
|
||||
@ -5969,13 +5978,11 @@ xfs_bmse_shift_one(
|
||||
ifp = XFS_IFORK_PTR(ip, whichfork);
|
||||
total_extents = xfs_iext_count(ifp);
|
||||
|
||||
xfs_bmbt_get_all(gotp, &got);
|
||||
|
||||
/* delalloc extents should be prevented by caller */
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock));
|
||||
|
||||
if (direction == SHIFT_LEFT) {
|
||||
startoff = got.br_startoff - offset_shift_fsb;
|
||||
startoff = got->br_startoff - offset_shift_fsb;
|
||||
|
||||
/*
|
||||
* Check for merge if we've got an extent to the left,
|
||||
@ -5983,46 +5990,39 @@ xfs_bmse_shift_one(
|
||||
* of the file for the shift.
|
||||
*/
|
||||
if (!*current_ext) {
|
||||
if (got.br_startoff < offset_shift_fsb)
|
||||
if (got->br_startoff < offset_shift_fsb)
|
||||
return -EINVAL;
|
||||
goto update_current_ext;
|
||||
}
|
||||
/*
|
||||
* grab the left extent and check for a large
|
||||
* enough hole.
|
||||
*/
|
||||
adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
|
||||
xfs_bmbt_get_all(adj_irecp, &adj_irec);
|
||||
|
||||
if (startoff <
|
||||
adj_irec.br_startoff + adj_irec.br_blockcount)
|
||||
/*
|
||||
* grab the left extent and check for a large enough hole.
|
||||
*/
|
||||
xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec);
|
||||
if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount)
|
||||
return -EINVAL;
|
||||
|
||||
/* check whether to merge the extent or shift it down */
|
||||
if (xfs_bmse_can_merge(&adj_irec, &got,
|
||||
offset_shift_fsb)) {
|
||||
error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
|
||||
*current_ext, gotp, adj_irecp,
|
||||
cur, logflags);
|
||||
if (error)
|
||||
return error;
|
||||
adj_irec = got;
|
||||
goto update_rmap;
|
||||
if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) {
|
||||
return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
|
||||
*current_ext, got, &adj_irec,
|
||||
cur, logflags, dfops);
|
||||
}
|
||||
} else {
|
||||
startoff = got.br_startoff + offset_shift_fsb;
|
||||
startoff = got->br_startoff + offset_shift_fsb;
|
||||
/* nothing to move if this is the last extent */
|
||||
if (*current_ext >= (total_extents - 1))
|
||||
goto update_current_ext;
|
||||
|
||||
/*
|
||||
* If this is not the last extent in the file, make sure there
|
||||
* is enough room between current extent and next extent for
|
||||
* accommodating the shift.
|
||||
*/
|
||||
adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
|
||||
xfs_bmbt_get_all(adj_irecp, &adj_irec);
|
||||
if (startoff + got.br_blockcount > adj_irec.br_startoff)
|
||||
xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec);
|
||||
if (startoff + got->br_blockcount > adj_irec.br_startoff)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Unlike a left shift (which involves a hole punch),
|
||||
* a right shift does not modify extent neighbors
|
||||
@ -6030,45 +6030,48 @@ xfs_bmse_shift_one(
|
||||
* in this scenario. Check anyways and warn if we
|
||||
* encounter two extents that could be one.
|
||||
*/
|
||||
if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
|
||||
if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb))
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the extent index for the next iteration, update the start
|
||||
* offset of the in-core extent and update the btree if applicable.
|
||||
*/
|
||||
update_current_ext:
|
||||
*logflags |= XFS_ILOG_CORE;
|
||||
|
||||
new = *got;
|
||||
new.br_startoff = startoff;
|
||||
|
||||
if (cur) {
|
||||
error = xfs_bmbt_lookup_eq(cur, got->br_startoff,
|
||||
got->br_startblock, got->br_blockcount, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
|
||||
|
||||
error = xfs_bmbt_update(cur, new.br_startoff,
|
||||
new.br_startblock, new.br_blockcount,
|
||||
new.br_state);
|
||||
if (error)
|
||||
return error;
|
||||
} else {
|
||||
*logflags |= XFS_ILOG_DEXT;
|
||||
}
|
||||
|
||||
xfs_iext_update_extent(ifp, *current_ext, &new);
|
||||
|
||||
if (direction == SHIFT_LEFT)
|
||||
(*current_ext)++;
|
||||
else
|
||||
(*current_ext)--;
|
||||
xfs_bmbt_set_startoff(gotp, startoff);
|
||||
*logflags |= XFS_ILOG_CORE;
|
||||
adj_irec = got;
|
||||
if (!cur) {
|
||||
*logflags |= XFS_ILOG_DEXT;
|
||||
goto update_rmap;
|
||||
}
|
||||
|
||||
error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
|
||||
got.br_blockcount, &i);
|
||||
if (error)
|
||||
return error;
|
||||
XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
|
||||
|
||||
got.br_startoff = startoff;
|
||||
error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
|
||||
got.br_blockcount, got.br_state);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
update_rmap:
|
||||
/* update reverse mapping */
|
||||
error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
|
||||
error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
|
||||
if (error)
|
||||
return error;
|
||||
adj_irec.br_startoff = startoff;
|
||||
return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
|
||||
return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6095,7 +6098,6 @@ xfs_bmap_shift_extents(
|
||||
int num_exts)
|
||||
{
|
||||
struct xfs_btree_cur *cur = NULL;
|
||||
struct xfs_bmbt_rec_host *gotp;
|
||||
struct xfs_bmbt_irec got;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_ifork *ifp;
|
||||
@ -6122,7 +6124,6 @@ xfs_bmap_shift_extents(
|
||||
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
|
||||
ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
|
||||
|
||||
ifp = XFS_IFORK_PTR(ip, whichfork);
|
||||
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
|
||||
@ -6154,10 +6155,26 @@ xfs_bmap_shift_extents(
|
||||
* In case of first right shift, we need to initialize next_fsb
|
||||
*/
|
||||
if (*next_fsb == NULLFSBLOCK) {
|
||||
gotp = xfs_iext_get_ext(ifp, total_extents - 1);
|
||||
xfs_bmbt_get_all(gotp, &got);
|
||||
ASSERT(direction == SHIFT_RIGHT);
|
||||
|
||||
current_ext = total_extents - 1;
|
||||
xfs_iext_get_extent(ifp, current_ext, &got);
|
||||
if (stop_fsb > got.br_startoff) {
|
||||
*done = 1;
|
||||
goto del_cursor;
|
||||
}
|
||||
*next_fsb = got.br_startoff;
|
||||
if (stop_fsb > *next_fsb) {
|
||||
} else {
|
||||
/*
|
||||
* Look up the extent index for the fsb where we start shifting. We can
|
||||
* henceforth iterate with current_ext as extent list changes are locked
|
||||
* out via ilock.
|
||||
*
|
||||
* If next_fsb lies in a hole beyond which there are no extents we are
|
||||
* done.
|
||||
*/
|
||||
if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, ¤t_ext,
|
||||
&got)) {
|
||||
*done = 1;
|
||||
goto del_cursor;
|
||||
}
|
||||
@ -6165,37 +6182,26 @@ xfs_bmap_shift_extents(
|
||||
|
||||
/* Lookup the extent index at which we have to stop */
|
||||
if (direction == SHIFT_RIGHT) {
|
||||
gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
|
||||
struct xfs_bmbt_irec s;
|
||||
|
||||
xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s);
|
||||
/* Make stop_extent exclusive of shift range */
|
||||
stop_extent--;
|
||||
} else
|
||||
if (current_ext <= stop_extent) {
|
||||
error = -EIO;
|
||||
goto del_cursor;
|
||||
}
|
||||
} else {
|
||||
stop_extent = total_extents;
|
||||
|
||||
/*
|
||||
* Look up the extent index for the fsb where we start shifting. We can
|
||||
* henceforth iterate with current_ext as extent list changes are locked
|
||||
* out via ilock.
|
||||
*
|
||||
* gotp can be null in 2 cases: 1) if there are no extents or 2)
|
||||
* *next_fsb lies in a hole beyond which there are no extents. Either
|
||||
* way, we are done.
|
||||
*/
|
||||
gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, ¤t_ext);
|
||||
if (!gotp) {
|
||||
*done = 1;
|
||||
goto del_cursor;
|
||||
}
|
||||
|
||||
/* some sanity checking before we finally start shifting extents */
|
||||
if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
|
||||
(direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
|
||||
error = -EIO;
|
||||
goto del_cursor;
|
||||
if (current_ext >= stop_extent) {
|
||||
error = -EIO;
|
||||
goto del_cursor;
|
||||
}
|
||||
}
|
||||
|
||||
while (nexts++ < num_exts) {
|
||||
error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
|
||||
¤t_ext, gotp, cur, &logflags,
|
||||
¤t_ext, &got, cur, &logflags,
|
||||
direction, dfops);
|
||||
if (error)
|
||||
goto del_cursor;
|
||||
@ -6213,13 +6219,11 @@ xfs_bmap_shift_extents(
|
||||
*next_fsb = NULLFSBLOCK;
|
||||
break;
|
||||
}
|
||||
gotp = xfs_iext_get_ext(ifp, current_ext);
|
||||
xfs_iext_get_extent(ifp, current_ext, &got);
|
||||
}
|
||||
|
||||
if (!*done) {
|
||||
xfs_bmbt_get_all(gotp, &got);
|
||||
if (!*done)
|
||||
*next_fsb = got.br_startoff;
|
||||
}
|
||||
|
||||
del_cursor:
|
||||
if (cur)
|
||||
@ -6248,7 +6252,6 @@ xfs_bmap_split_extent_at(
|
||||
{
|
||||
int whichfork = XFS_DATA_FORK;
|
||||
struct xfs_btree_cur *cur = NULL;
|
||||
struct xfs_bmbt_rec_host *gotp;
|
||||
struct xfs_bmbt_irec got;
|
||||
struct xfs_bmbt_irec new; /* split extent */
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
@ -6280,21 +6283,10 @@ xfs_bmap_split_extent_at(
|
||||
}
|
||||
|
||||
/*
|
||||
* gotp can be null in 2 cases: 1) if there are no extents
|
||||
* or 2) split_fsb lies in a hole beyond which there are
|
||||
* no extents. Either way, we are done.
|
||||
* If there are not extents, or split_fsb lies in a hole we are done.
|
||||
*/
|
||||
gotp = xfs_iext_bno_to_ext(ifp, split_fsb, ¤t_ext);
|
||||
if (!gotp)
|
||||
return 0;
|
||||
|
||||
xfs_bmbt_get_all(gotp, &got);
|
||||
|
||||
/*
|
||||
* Check split_fsb lies in a hole or the start boundary offset
|
||||
* of the extent.
|
||||
*/
|
||||
if (got.br_startoff >= split_fsb)
|
||||
if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, ¤t_ext, &got) ||
|
||||
got.br_startoff >= split_fsb)
|
||||
return 0;
|
||||
|
||||
gotblkcnt = split_fsb - got.br_startoff;
|
||||
@ -6317,8 +6309,8 @@ xfs_bmap_split_extent_at(
|
||||
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
|
||||
}
|
||||
|
||||
xfs_bmbt_set_blockcount(gotp, gotblkcnt);
|
||||
got.br_blockcount = gotblkcnt;
|
||||
xfs_iext_update_extent(ifp, current_ext, &got);
|
||||
|
||||
logflags = XFS_ILOG_CORE;
|
||||
if (cur) {
|
||||
@ -6402,7 +6394,7 @@ xfs_bmap_split_extent(
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -6452,7 +6444,7 @@ __xfs_bmap_add(
|
||||
bi->bi_whichfork = whichfork;
|
||||
bi->bi_bmap = *bmap;
|
||||
|
||||
error = xfs_defer_join(dfops, bi->bi_owner);
|
||||
error = xfs_defer_ijoin(dfops, bi->bi_owner);
|
||||
if (error) {
|
||||
kmem_free(bi);
|
||||
return error;
|
||||
|
@ -858,6 +858,7 @@ xfs_bmbt_change_owner(
|
||||
cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
|
||||
if (!cur)
|
||||
return -ENOMEM;
|
||||
cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER;
|
||||
|
||||
error = xfs_btree_change_owner(cur, new_owner, buffer_list);
|
||||
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
|
||||
|
@ -1791,6 +1791,7 @@ xfs_btree_lookup_get_block(
|
||||
|
||||
/* Check the inode owner since the verifiers don't. */
|
||||
if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
|
||||
!(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
|
||||
(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
|
||||
be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
|
||||
cur->bc_private.b.ip->i_ino)
|
||||
@ -4451,10 +4452,15 @@ xfs_btree_block_change_owner(
|
||||
|
||||
/* modify the owner */
|
||||
block = xfs_btree_get_block(cur, level, &bp);
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
|
||||
if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
|
||||
return 0;
|
||||
block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
|
||||
else
|
||||
} else {
|
||||
if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
|
||||
return 0;
|
||||
block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the block is a root block hosted in an inode, we might not have a
|
||||
@ -4463,16 +4469,19 @@ xfs_btree_block_change_owner(
|
||||
* block is formatted into the on-disk inode fork. We still change it,
|
||||
* though, so everything is consistent in memory.
|
||||
*/
|
||||
if (bp) {
|
||||
if (cur->bc_tp) {
|
||||
xfs_trans_ordered_buf(cur->bc_tp, bp);
|
||||
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
|
||||
} else {
|
||||
xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
|
||||
}
|
||||
} else {
|
||||
if (!bp) {
|
||||
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
|
||||
ASSERT(level == cur->bc_nlevels - 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (cur->bc_tp) {
|
||||
if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
|
||||
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
|
||||
return -EAGAIN;
|
||||
}
|
||||
} else {
|
||||
xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -233,7 +233,8 @@ typedef struct xfs_btree_cur
|
||||
short forksize; /* fork's inode space */
|
||||
char whichfork; /* data or attr fork */
|
||||
char flags; /* flags */
|
||||
#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */
|
||||
#define XFS_BTCUR_BPRV_WASDEL (1<<0) /* was delayed */
|
||||
#define XFS_BTCUR_BPRV_INVALID_OWNER (1<<1) /* for ext swap */
|
||||
} b;
|
||||
} bc_private; /* per-btree type data */
|
||||
} xfs_btree_cur_t;
|
||||
|
@ -240,23 +240,19 @@ xfs_defer_trans_abort(
|
||||
STATIC int
|
||||
xfs_defer_trans_roll(
|
||||
struct xfs_trans **tp,
|
||||
struct xfs_defer_ops *dop,
|
||||
struct xfs_inode *ip)
|
||||
struct xfs_defer_ops *dop)
|
||||
{
|
||||
int i;
|
||||
int error;
|
||||
|
||||
/* Log all the joined inodes except the one we passed in. */
|
||||
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
|
||||
if (dop->dop_inodes[i] == ip)
|
||||
continue;
|
||||
/* Log all the joined inodes. */
|
||||
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
|
||||
xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
|
||||
}
|
||||
|
||||
trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
|
||||
|
||||
/* Roll the transaction. */
|
||||
error = xfs_trans_roll(tp, ip);
|
||||
error = xfs_trans_roll(tp);
|
||||
if (error) {
|
||||
trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
|
||||
xfs_defer_trans_abort(*tp, dop, error);
|
||||
@ -264,12 +260,9 @@ xfs_defer_trans_roll(
|
||||
}
|
||||
dop->dop_committed = true;
|
||||
|
||||
/* Rejoin the joined inodes except the one we passed in. */
|
||||
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
|
||||
if (dop->dop_inodes[i] == ip)
|
||||
continue;
|
||||
/* Rejoin the joined inodes. */
|
||||
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
|
||||
xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
@ -284,11 +277,10 @@ xfs_defer_has_unfinished_work(
|
||||
|
||||
/*
|
||||
* Add this inode to the deferred op. Each joined inode is relogged
|
||||
* each time we roll the transaction, in addition to any inode passed
|
||||
* to xfs_defer_finish().
|
||||
* each time we roll the transaction.
|
||||
*/
|
||||
int
|
||||
xfs_defer_join(
|
||||
xfs_defer_ijoin(
|
||||
struct xfs_defer_ops *dop,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
@ -317,8 +309,7 @@ xfs_defer_join(
|
||||
int
|
||||
xfs_defer_finish(
|
||||
struct xfs_trans **tp,
|
||||
struct xfs_defer_ops *dop,
|
||||
struct xfs_inode *ip)
|
||||
struct xfs_defer_ops *dop)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
struct list_head *li;
|
||||
@ -337,7 +328,7 @@ xfs_defer_finish(
|
||||
xfs_defer_intake_work(*tp, dop);
|
||||
|
||||
/* Roll the transaction. */
|
||||
error = xfs_defer_trans_roll(tp, dop, ip);
|
||||
error = xfs_defer_trans_roll(tp, dop);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
|
@ -72,12 +72,11 @@ struct xfs_defer_ops {
|
||||
|
||||
void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
|
||||
struct list_head *h);
|
||||
int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop,
|
||||
struct xfs_inode *ip);
|
||||
int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop);
|
||||
void xfs_defer_cancel(struct xfs_defer_ops *dop);
|
||||
void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
|
||||
bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
|
||||
int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip);
|
||||
int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
|
||||
|
||||
/* Description of a deferred type. */
|
||||
struct xfs_defer_op_type {
|
||||
|
@ -378,8 +378,6 @@ xfs_ialloc_inode_init(
|
||||
* transaction and pin the log appropriately.
|
||||
*/
|
||||
xfs_trans_ordered_buf(tp, fbuf);
|
||||
xfs_trans_log_buf(tp, fbuf, 0,
|
||||
BBTOB(fbuf->b_length) - 1);
|
||||
}
|
||||
} else {
|
||||
fbuf->b_flags |= XBF_DONE;
|
||||
@ -1133,6 +1131,7 @@ xfs_dialloc_ag_inobt(
|
||||
int error;
|
||||
int offset;
|
||||
int i, j;
|
||||
int searchdistance = 10;
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
|
||||
@ -1159,7 +1158,6 @@ xfs_dialloc_ag_inobt(
|
||||
if (pagno == agno) {
|
||||
int doneleft; /* done, to the left */
|
||||
int doneright; /* done, to the right */
|
||||
int searchdistance = 10;
|
||||
|
||||
error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
|
||||
if (error)
|
||||
@ -1220,21 +1218,9 @@ xfs_dialloc_ag_inobt(
|
||||
/*
|
||||
* Loop until we find an inode chunk with a free inode.
|
||||
*/
|
||||
while (!doneleft || !doneright) {
|
||||
while (--searchdistance > 0 && (!doneleft || !doneright)) {
|
||||
int useleft; /* using left inode chunk this time */
|
||||
|
||||
if (!--searchdistance) {
|
||||
/*
|
||||
* Not in range - save last search
|
||||
* location and allocate a new inode
|
||||
*/
|
||||
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
|
||||
pag->pagl_leftrec = trec.ir_startino;
|
||||
pag->pagl_rightrec = rec.ir_startino;
|
||||
pag->pagl_pagino = pagino;
|
||||
goto newino;
|
||||
}
|
||||
|
||||
/* figure out the closer block if both are valid. */
|
||||
if (!doneleft && !doneright) {
|
||||
useleft = pagino -
|
||||
@ -1278,26 +1264,37 @@ xfs_dialloc_ag_inobt(
|
||||
goto error1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We've reached the end of the btree. because
|
||||
* we are only searching a small chunk of the
|
||||
* btree each search, there is obviously free
|
||||
* inodes closer to the parent inode than we
|
||||
* are now. restart the search again.
|
||||
*/
|
||||
pag->pagl_pagino = NULLAGINO;
|
||||
pag->pagl_leftrec = NULLAGINO;
|
||||
pag->pagl_rightrec = NULLAGINO;
|
||||
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
goto restart_pagno;
|
||||
if (searchdistance <= 0) {
|
||||
/*
|
||||
* Not in range - save last search
|
||||
* location and allocate a new inode
|
||||
*/
|
||||
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
|
||||
pag->pagl_leftrec = trec.ir_startino;
|
||||
pag->pagl_rightrec = rec.ir_startino;
|
||||
pag->pagl_pagino = pagino;
|
||||
|
||||
} else {
|
||||
/*
|
||||
* We've reached the end of the btree. because
|
||||
* we are only searching a small chunk of the
|
||||
* btree each search, there is obviously free
|
||||
* inodes closer to the parent inode than we
|
||||
* are now. restart the search again.
|
||||
*/
|
||||
pag->pagl_pagino = NULLAGINO;
|
||||
pag->pagl_leftrec = NULLAGINO;
|
||||
pag->pagl_rightrec = NULLAGINO;
|
||||
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
|
||||
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
||||
goto restart_pagno;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* In a different AG from the parent.
|
||||
* See if the most recently allocated block has any free.
|
||||
*/
|
||||
newino:
|
||||
if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
|
||||
error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
|
||||
XFS_LOOKUP_EQ, &i);
|
||||
|
@ -1499,14 +1499,11 @@ xfs_iext_realloc_indirect(
|
||||
xfs_ifork_t *ifp, /* inode fork pointer */
|
||||
int new_size) /* new indirection array size */
|
||||
{
|
||||
int nlists; /* number of irec's (ex lists) */
|
||||
int size; /* current indirection array size */
|
||||
|
||||
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
|
||||
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
|
||||
size = nlists * sizeof(xfs_ext_irec_t);
|
||||
ASSERT(ifp->if_real_bytes);
|
||||
ASSERT((new_size >= 0) && (new_size != size));
|
||||
ASSERT((new_size >= 0) &&
|
||||
(new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) *
|
||||
sizeof(xfs_ext_irec_t))));
|
||||
if (new_size == 0) {
|
||||
xfs_iext_destroy(ifp);
|
||||
} else {
|
||||
@ -2023,3 +2020,15 @@ xfs_iext_get_extent(
|
||||
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
xfs_iext_update_extent(
|
||||
struct xfs_ifork *ifp,
|
||||
xfs_extnum_t idx,
|
||||
struct xfs_bmbt_irec *gotp)
|
||||
{
|
||||
ASSERT(idx >= 0);
|
||||
ASSERT(idx < xfs_iext_count(ifp));
|
||||
|
||||
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp);
|
||||
}
|
||||
|
@ -187,6 +187,8 @@ bool xfs_iext_lookup_extent(struct xfs_inode *ip,
|
||||
xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
|
||||
bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
|
||||
struct xfs_bmbt_irec *gotp);
|
||||
void xfs_iext_update_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
|
||||
struct xfs_bmbt_irec *gotp);
|
||||
|
||||
extern struct kmem_zone *xfs_ifork_zone;
|
||||
|
||||
|
@ -1679,7 +1679,7 @@ xfs_refcount_recover_cow_leftovers(
|
||||
xfs_bmap_add_free(mp, &dfops, fsb,
|
||||
rr->rr_rrec.rc_blockcount, NULL);
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_defer;
|
||||
|
||||
|
@ -85,11 +85,11 @@ xfs_find_bdev_for_inode(
|
||||
* associated buffer_heads, paying attention to the start and end offsets that
|
||||
* we need to process on the page.
|
||||
*
|
||||
* Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
|
||||
* buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
|
||||
* the page at all, as we may be racing with memory reclaim and it can free both
|
||||
* the bufferhead chain and the page as it will see the page as clean and
|
||||
* unused.
|
||||
* Note that we open code the action in end_buffer_async_write here so that we
|
||||
* only have to iterate over the buffers attached to the page once. This is not
|
||||
* only more efficient, but also ensures that we only calls end_page_writeback
|
||||
* at the end of the iteration, and thus avoids the pitfall of having the page
|
||||
* and buffers potentially freed after every call to end_buffer_async_write.
|
||||
*/
|
||||
static void
|
||||
xfs_finish_page_writeback(
|
||||
@ -97,29 +97,44 @@ xfs_finish_page_writeback(
|
||||
struct bio_vec *bvec,
|
||||
int error)
|
||||
{
|
||||
unsigned int end = bvec->bv_offset + bvec->bv_len - 1;
|
||||
struct buffer_head *head, *bh, *next;
|
||||
struct buffer_head *head = page_buffers(bvec->bv_page), *bh = head;
|
||||
bool busy = false;
|
||||
unsigned int off = 0;
|
||||
unsigned int bsize;
|
||||
unsigned long flags;
|
||||
|
||||
ASSERT(bvec->bv_offset < PAGE_SIZE);
|
||||
ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
|
||||
ASSERT(end < PAGE_SIZE);
|
||||
ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
|
||||
ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
|
||||
|
||||
bh = head = page_buffers(bvec->bv_page);
|
||||
|
||||
bsize = bh->b_size;
|
||||
local_irq_save(flags);
|
||||
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
||||
do {
|
||||
if (off > end)
|
||||
break;
|
||||
next = bh->b_this_page;
|
||||
if (off < bvec->bv_offset)
|
||||
goto next_bh;
|
||||
bh->b_end_io(bh, !error);
|
||||
next_bh:
|
||||
off += bsize;
|
||||
} while ((bh = next) != head);
|
||||
if (off >= bvec->bv_offset &&
|
||||
off < bvec->bv_offset + bvec->bv_len) {
|
||||
ASSERT(buffer_async_write(bh));
|
||||
ASSERT(bh->b_end_io == NULL);
|
||||
|
||||
if (error) {
|
||||
mark_buffer_write_io_error(bh);
|
||||
clear_buffer_uptodate(bh);
|
||||
SetPageError(bvec->bv_page);
|
||||
} else {
|
||||
set_buffer_uptodate(bh);
|
||||
}
|
||||
clear_buffer_async_write(bh);
|
||||
unlock_buffer(bh);
|
||||
} else if (buffer_async_write(bh)) {
|
||||
ASSERT(buffer_locked(bh));
|
||||
busy = true;
|
||||
}
|
||||
off += bh->b_size;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (!busy)
|
||||
end_page_writeback(bvec->bv_page);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -133,8 +148,10 @@ xfs_destroy_ioend(
|
||||
int error)
|
||||
{
|
||||
struct inode *inode = ioend->io_inode;
|
||||
struct bio *last = ioend->io_bio;
|
||||
struct bio *bio, *next;
|
||||
struct bio *bio = &ioend->io_inline_bio;
|
||||
struct bio *last = ioend->io_bio, *next;
|
||||
u64 start = bio->bi_iter.bi_sector;
|
||||
bool quiet = bio_flagged(bio, BIO_QUIET);
|
||||
|
||||
for (bio = &ioend->io_inline_bio; bio; bio = next) {
|
||||
struct bio_vec *bvec;
|
||||
@ -155,6 +172,11 @@ xfs_destroy_ioend(
|
||||
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
if (unlikely(error && !quiet)) {
|
||||
xfs_err_ratelimited(XFS_I(inode)->i_mount,
|
||||
"writeback error on sector %llu", start);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -423,7 +445,8 @@ xfs_start_buffer_writeback(
|
||||
ASSERT(!buffer_delay(bh));
|
||||
ASSERT(!buffer_unwritten(bh));
|
||||
|
||||
mark_buffer_async_write(bh);
|
||||
bh->b_end_io = NULL;
|
||||
set_buffer_async_write(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
clear_buffer_dirty(bh);
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ xfs_attr3_leaf_freextent(
|
||||
/*
|
||||
* Roll to next transaction.
|
||||
*/
|
||||
error = xfs_trans_roll(trans, dp);
|
||||
error = xfs_trans_roll_inode(trans, dp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
@ -308,7 +308,7 @@ xfs_attr3_node_inactive(
|
||||
/*
|
||||
* Atomically commit the whole invalidate stuff.
|
||||
*/
|
||||
error = xfs_trans_roll(trans, dp);
|
||||
error = xfs_trans_roll_inode(trans, dp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
@ -375,7 +375,7 @@ xfs_attr3_root_inactive(
|
||||
/*
|
||||
* Commit the invalidate and start the next transaction.
|
||||
*/
|
||||
error = xfs_trans_roll(trans, dp);
|
||||
error = xfs_trans_roll_inode(trans, dp);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
@ -502,7 +502,7 @@ xfs_bui_recover(
|
||||
}
|
||||
|
||||
/* Finish transaction, free inodes. */
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto err_dfops;
|
||||
|
||||
|
@ -222,22 +222,21 @@ xfs_bmap_eof(
|
||||
* Count leaf blocks given a range of extent records. Delayed allocation
|
||||
* extents are not counted towards the totals.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_extnum_t
|
||||
xfs_bmap_count_leaves(
|
||||
struct xfs_ifork *ifp,
|
||||
xfs_extnum_t *numrecs,
|
||||
xfs_filblks_t *count)
|
||||
{
|
||||
xfs_extnum_t i;
|
||||
xfs_extnum_t nr_exts = xfs_iext_count(ifp);
|
||||
struct xfs_bmbt_irec got;
|
||||
xfs_extnum_t numrecs = 0, i = 0;
|
||||
|
||||
for (i = 0; i < nr_exts; i++) {
|
||||
xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
|
||||
if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
|
||||
(*numrecs)++;
|
||||
*count += xfs_bmbt_get_blockcount(frp);
|
||||
while (xfs_iext_get_extent(ifp, i++, &got)) {
|
||||
if (!isnullstartblock(got.br_startblock)) {
|
||||
*count += got.br_blockcount;
|
||||
numrecs++;
|
||||
}
|
||||
}
|
||||
return numrecs;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -370,7 +369,7 @@ xfs_bmap_count_blocks(
|
||||
|
||||
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
xfs_bmap_count_leaves(ifp, nextents, count);
|
||||
*nextents = xfs_bmap_count_leaves(ifp, count);
|
||||
return 0;
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
|
||||
@ -1136,7 +1135,7 @@ xfs_alloc_file_space(
|
||||
/*
|
||||
* Complete the transaction
|
||||
*/
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
@ -1202,7 +1201,8 @@ xfs_unmap_extent(
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -1496,7 +1496,7 @@ xfs_shift_file_space(
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -1777,7 +1777,8 @@ xfs_swap_extent_rmap(
|
||||
if (error)
|
||||
goto out_defer;
|
||||
|
||||
error = xfs_defer_finish(tpp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(tpp, &dfops);
|
||||
if (error)
|
||||
goto out_defer;
|
||||
|
||||
@ -1840,29 +1841,18 @@ xfs_swap_extent_forks(
|
||||
}
|
||||
|
||||
/*
|
||||
* Before we've swapped the forks, lets set the owners of the forks
|
||||
* appropriately. We have to do this as we are demand paging the btree
|
||||
* buffers, and so the validation done on read will expect the owner
|
||||
* field to be correctly set. Once we change the owners, we can swap the
|
||||
* inode forks.
|
||||
* Btree format (v3) inodes have the inode number stamped in the bmbt
|
||||
* block headers. We can't start changing the bmbt blocks until the
|
||||
* inode owner change is logged so recovery does the right thing in the
|
||||
* event of a crash. Set the owner change log flags now and leave the
|
||||
* bmbt scan as the last step.
|
||||
*/
|
||||
if (ip->i_d.di_version == 3 &&
|
||||
ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
|
||||
ip->i_d.di_format == XFS_DINODE_FMT_BTREE)
|
||||
(*target_log_flags) |= XFS_ILOG_DOWNER;
|
||||
error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
|
||||
tip->i_ino, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
if (tip->i_d.di_version == 3 &&
|
||||
tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
|
||||
tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
|
||||
(*src_log_flags) |= XFS_ILOG_DOWNER;
|
||||
error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
|
||||
ip->i_ino, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap the data forks of the inodes
|
||||
@ -1940,6 +1930,48 @@ xfs_swap_extent_forks(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix up the owners of the bmbt blocks to refer to the current inode. The
|
||||
* change owner scan attempts to order all modified buffers in the current
|
||||
* transaction. In the event of ordered buffer failure, the offending buffer is
|
||||
* physically logged as a fallback and the scan returns -EAGAIN. We must roll
|
||||
* the transaction in this case to replenish the fallback log reservation and
|
||||
* restart the scan. This process repeats until the scan completes.
|
||||
*/
|
||||
static int
|
||||
xfs_swap_change_owner(
|
||||
struct xfs_trans **tpp,
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_inode *tmpip)
|
||||
{
|
||||
int error;
|
||||
struct xfs_trans *tp = *tpp;
|
||||
|
||||
do {
|
||||
error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
|
||||
NULL);
|
||||
/* success or fatal error */
|
||||
if (error != -EAGAIN)
|
||||
break;
|
||||
|
||||
error = xfs_trans_roll(tpp);
|
||||
if (error)
|
||||
break;
|
||||
tp = *tpp;
|
||||
|
||||
/*
|
||||
* Redirty both inodes so they can relog and keep the log tail
|
||||
* moving forward.
|
||||
*/
|
||||
xfs_trans_ijoin(tp, ip, 0);
|
||||
xfs_trans_ijoin(tp, tmpip, 0);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
|
||||
} while (true);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_swap_extents(
|
||||
struct xfs_inode *ip, /* target inode */
|
||||
@ -1954,7 +1986,7 @@ xfs_swap_extents(
|
||||
int lock_flags;
|
||||
struct xfs_ifork *cowfp;
|
||||
uint64_t f;
|
||||
int resblks;
|
||||
int resblks = 0;
|
||||
|
||||
/*
|
||||
* Lock the inodes against other IO, page faults and truncate to
|
||||
@ -2002,11 +2034,8 @@ xfs_swap_extents(
|
||||
XFS_SWAP_RMAP_SPACE_RES(mp,
|
||||
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
|
||||
XFS_DATA_FORK);
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
|
||||
0, 0, &tp);
|
||||
} else
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
|
||||
0, 0, &tp);
|
||||
}
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
@ -2091,6 +2120,23 @@ xfs_swap_extents(
|
||||
xfs_trans_log_inode(tp, ip, src_log_flags);
|
||||
xfs_trans_log_inode(tp, tip, target_log_flags);
|
||||
|
||||
/*
|
||||
* The extent forks have been swapped, but crc=1,rmapbt=0 filesystems
|
||||
* have inode number owner values in the bmbt blocks that still refer to
|
||||
* the old inode. Scan each bmbt to fix up the owner values with the
|
||||
* inode number of the current inode.
|
||||
*/
|
||||
if (src_log_flags & XFS_ILOG_DOWNER) {
|
||||
error = xfs_swap_change_owner(&tp, ip, tip);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
if (target_log_flags & XFS_ILOG_DOWNER) {
|
||||
error = xfs_swap_change_owner(&tp, tip, ip);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a synchronous mount, make sure that the
|
||||
* transaction goes to disk before returning to the user.
|
||||
|
@ -70,6 +70,7 @@ int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
|
||||
|
||||
xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
|
||||
|
||||
xfs_extnum_t xfs_bmap_count_leaves(struct xfs_ifork *ifp, xfs_filblks_t *count);
|
||||
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
int whichfork, xfs_extnum_t *nextents,
|
||||
xfs_filblks_t *count);
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inode.h"
|
||||
|
||||
|
||||
kmem_zone_t *xfs_buf_item_zone;
|
||||
@ -322,6 +323,8 @@ xfs_buf_item_format(
|
||||
ASSERT((bip->bli_flags & XFS_BLI_STALE) ||
|
||||
(xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF
|
||||
&& xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED) ||
|
||||
(bip->bli_flags & XFS_BLI_STALE));
|
||||
|
||||
|
||||
/*
|
||||
@ -346,16 +349,6 @@ xfs_buf_item_format(
|
||||
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
|
||||
}
|
||||
|
||||
if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) ==
|
||||
XFS_BLI_ORDERED) {
|
||||
/*
|
||||
* The buffer has been logged just to order it. It is not being
|
||||
* included in the transaction commit, so don't format it.
|
||||
*/
|
||||
trace_xfs_buf_item_format_ordered(bip);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < bip->bli_format_count; i++) {
|
||||
xfs_buf_item_format_segment(bip, lv, &vecp, offset,
|
||||
&bip->bli_formats[i]);
|
||||
@ -574,26 +567,20 @@ xfs_buf_item_unlock(
|
||||
{
|
||||
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
||||
struct xfs_buf *bp = bip->bli_buf;
|
||||
bool clean;
|
||||
bool aborted;
|
||||
int flags;
|
||||
bool aborted = !!(lip->li_flags & XFS_LI_ABORTED);
|
||||
bool hold = !!(bip->bli_flags & XFS_BLI_HOLD);
|
||||
bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
|
||||
#if defined(DEBUG) || defined(XFS_WARN)
|
||||
bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
|
||||
#endif
|
||||
|
||||
/* Clear the buffer's association with this transaction. */
|
||||
bp->b_transp = NULL;
|
||||
|
||||
/*
|
||||
* If this is a transaction abort, don't return early. Instead, allow
|
||||
* the brelse to happen. Normally it would be done for stale
|
||||
* (cancelled) buffers at unpin time, but we'll never go through the
|
||||
* pin/unpin cycle if we abort inside commit.
|
||||
* The per-transaction state has been copied above so clear it from the
|
||||
* bli.
|
||||
*/
|
||||
aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false;
|
||||
/*
|
||||
* Before possibly freeing the buf item, copy the per-transaction state
|
||||
* so we can reference it safely later after clearing it from the
|
||||
* buffer log item.
|
||||
*/
|
||||
flags = bip->bli_flags;
|
||||
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
|
||||
|
||||
/*
|
||||
@ -601,7 +588,7 @@ xfs_buf_item_unlock(
|
||||
* unlock the buffer and free the buf item when the buffer is unpinned
|
||||
* for the last time.
|
||||
*/
|
||||
if (flags & XFS_BLI_STALE) {
|
||||
if (bip->bli_flags & XFS_BLI_STALE) {
|
||||
trace_xfs_buf_item_unlock_stale(bip);
|
||||
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
|
||||
if (!aborted) {
|
||||
@ -619,20 +606,11 @@ xfs_buf_item_unlock(
|
||||
* regardless of whether it is dirty or not. A dirty abort implies a
|
||||
* shutdown, anyway.
|
||||
*
|
||||
* Ordered buffers are dirty but may have no recorded changes, so ensure
|
||||
* we only release clean items here.
|
||||
* The bli dirty state should match whether the blf has logged segments
|
||||
* except for ordered buffers, where only the bli should be dirty.
|
||||
*/
|
||||
clean = (flags & XFS_BLI_DIRTY) ? false : true;
|
||||
if (clean) {
|
||||
int i;
|
||||
for (i = 0; i < bip->bli_format_count; i++) {
|
||||
if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
|
||||
bip->bli_formats[i].blf_map_size)) {
|
||||
clean = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||
|
||||
(ordered && dirty && !xfs_buf_item_dirty_format(bip)));
|
||||
|
||||
/*
|
||||
* Clean buffers, by definition, cannot be in the AIL. However, aborted
|
||||
@ -651,11 +629,11 @@ xfs_buf_item_unlock(
|
||||
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
|
||||
xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
|
||||
xfs_buf_item_relse(bp);
|
||||
} else if (clean)
|
||||
} else if (!dirty)
|
||||
xfs_buf_item_relse(bp);
|
||||
}
|
||||
|
||||
if (!(flags & XFS_BLI_HOLD))
|
||||
if (!hold)
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
||||
@ -945,14 +923,22 @@ xfs_buf_item_log(
|
||||
|
||||
|
||||
/*
|
||||
* Return 1 if the buffer has been logged or ordered in a transaction (at any
|
||||
* point, not just the current transaction) and 0 if not.
|
||||
* Return true if the buffer has any ranges logged/dirtied by a transaction,
|
||||
* false otherwise.
|
||||
*/
|
||||
uint
|
||||
xfs_buf_item_dirty(
|
||||
xfs_buf_log_item_t *bip)
|
||||
bool
|
||||
xfs_buf_item_dirty_format(
|
||||
struct xfs_buf_log_item *bip)
|
||||
{
|
||||
return (bip->bli_flags & XFS_BLI_DIRTY);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bip->bli_format_count; i++) {
|
||||
if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
|
||||
bip->bli_formats[i].blf_map_size))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
@ -1054,6 +1040,31 @@ xfs_buf_do_callbacks(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke the error state callback for each log item affected by the failed I/O.
|
||||
*
|
||||
* If a metadata buffer write fails with a non-permanent error, the buffer is
|
||||
* eventually resubmitted and so the completion callbacks are not run. The error
|
||||
* state may need to be propagated to the log items attached to the buffer,
|
||||
* however, so the next AIL push of the item knows hot to handle it correctly.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_buf_do_callbacks_fail(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_log_item *next;
|
||||
struct xfs_log_item *lip = bp->b_fspriv;
|
||||
struct xfs_ail *ailp = lip->li_ailp;
|
||||
|
||||
spin_lock(&ailp->xa_lock);
|
||||
for (; lip; lip = next) {
|
||||
next = lip->li_bio_list;
|
||||
if (lip->li_ops->iop_error)
|
||||
lip->li_ops->iop_error(lip, bp);
|
||||
}
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_buf_iodone_callback_error(
|
||||
struct xfs_buf *bp)
|
||||
@ -1123,7 +1134,11 @@ xfs_buf_iodone_callback_error(
|
||||
if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
|
||||
goto permanent_error;
|
||||
|
||||
/* still a transient error, higher layers will retry */
|
||||
/*
|
||||
* Still a transient error, run IO completion failure callbacks and let
|
||||
* the higher layers retry the buffer.
|
||||
*/
|
||||
xfs_buf_do_callbacks_fail(bp);
|
||||
xfs_buf_ioerror(bp, 0);
|
||||
xfs_buf_relse(bp);
|
||||
return true;
|
||||
@ -1204,3 +1219,31 @@ xfs_buf_iodone(
|
||||
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
|
||||
xfs_buf_item_free(BUF_ITEM(lip));
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue a failed buffer for writeback
|
||||
*
|
||||
* Return true if the buffer has been re-queued properly, false otherwise
|
||||
*/
|
||||
bool
|
||||
xfs_buf_resubmit_failed_buffers(
|
||||
struct xfs_buf *bp,
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
struct xfs_log_item *next;
|
||||
|
||||
/*
|
||||
* Clear XFS_LI_FAILED flag from all items before resubmit
|
||||
*
|
||||
* XFS_LI_FAILED set/clear is protected by xa_lock, caller this
|
||||
* function already have it acquired
|
||||
*/
|
||||
for (; lip; lip = next) {
|
||||
next = lip->li_bio_list;
|
||||
xfs_clear_li_failed(lip);
|
||||
}
|
||||
|
||||
/* Add this buffer back to the delayed write list */
|
||||
return xfs_buf_delwri_queue(bp, buffer_list);
|
||||
}
|
||||
|
@ -64,12 +64,15 @@ typedef struct xfs_buf_log_item {
|
||||
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
|
||||
void xfs_buf_item_relse(struct xfs_buf *);
|
||||
void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
|
||||
uint xfs_buf_item_dirty(xfs_buf_log_item_t *);
|
||||
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
|
||||
void xfs_buf_attach_iodone(struct xfs_buf *,
|
||||
void(*)(struct xfs_buf *, xfs_log_item_t *),
|
||||
xfs_log_item_t *);
|
||||
void xfs_buf_iodone_callbacks(struct xfs_buf *);
|
||||
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
|
||||
bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
|
||||
struct xfs_log_item *,
|
||||
struct list_head *);
|
||||
|
||||
extern kmem_zone_t *xfs_buf_item_zone;
|
||||
|
||||
|
@ -383,7 +383,7 @@ xfs_qm_dqalloc(
|
||||
|
||||
xfs_trans_bhold(tp, bp);
|
||||
|
||||
error = xfs_defer_finish(tpp, &dfops, NULL);
|
||||
error = xfs_defer_finish(tpp, &dfops);
|
||||
if (error)
|
||||
goto error1;
|
||||
|
||||
|
@ -57,6 +57,7 @@ static unsigned int xfs_errortag_random_default[] = {
|
||||
XFS_RANDOM_AG_RESV_CRITICAL,
|
||||
XFS_RANDOM_DROP_WRITES,
|
||||
XFS_RANDOM_LOG_BAD_CRC,
|
||||
XFS_RANDOM_LOG_ITEM_PIN,
|
||||
};
|
||||
|
||||
struct xfs_errortag_attr {
|
||||
@ -161,6 +162,7 @@ XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
|
||||
XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
|
||||
XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
|
||||
XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
|
||||
XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
|
||||
|
||||
static struct attribute *xfs_errortag_attrs[] = {
|
||||
XFS_ERRORTAG_ATTR_LIST(noerror),
|
||||
@ -193,6 +195,7 @@ static struct attribute *xfs_errortag_attrs[] = {
|
||||
XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
|
||||
XFS_ERRORTAG_ATTR_LIST(drop_writes),
|
||||
XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
|
||||
XFS_ERRORTAG_ATTR_LIST(log_item_pin),
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -106,7 +106,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
|
||||
*/
|
||||
#define XFS_ERRTAG_DROP_WRITES 28
|
||||
#define XFS_ERRTAG_LOG_BAD_CRC 29
|
||||
#define XFS_ERRTAG_MAX 30
|
||||
#define XFS_ERRTAG_LOG_ITEM_PIN 30
|
||||
#define XFS_ERRTAG_MAX 31
|
||||
|
||||
/*
|
||||
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
|
||||
@ -141,6 +142,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
|
||||
#define XFS_RANDOM_AG_RESV_CRITICAL 4
|
||||
#define XFS_RANDOM_DROP_WRITES 1
|
||||
#define XFS_RANDOM_LOG_BAD_CRC 1
|
||||
#define XFS_RANDOM_LOG_ITEM_PIN 1
|
||||
|
||||
#ifdef DEBUG
|
||||
extern int xfs_errortag_init(struct xfs_mount *mp);
|
||||
|
@ -1011,98 +1011,69 @@ xfs_file_llseek(
|
||||
* page_lock (MM)
|
||||
* i_lock (XFS - extent map serialisation)
|
||||
*/
|
||||
|
||||
/*
|
||||
* mmap()d file has taken write protection fault and is being made writable. We
|
||||
* can set the page state up correctly for a writable page, which means we can
|
||||
* do correct delalloc accounting (ENOSPC checking!) and unwritten extent
|
||||
* mapping.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_filemap_page_mkwrite(
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
int ret;
|
||||
|
||||
trace_xfs_filemap_page_mkwrite(XFS_I(inode));
|
||||
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
if (IS_DAX(inode)) {
|
||||
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
|
||||
} else {
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
|
||||
ret = block_page_mkwrite_return(ret);
|
||||
}
|
||||
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_filemap_fault(
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
int ret;
|
||||
|
||||
trace_xfs_filemap_fault(XFS_I(inode));
|
||||
|
||||
/* DAX can shortcut the normal fault path on write faults! */
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
|
||||
return xfs_filemap_page_mkwrite(vmf);
|
||||
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
if (IS_DAX(inode))
|
||||
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
|
||||
else
|
||||
ret = filemap_fault(vmf);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to xfs_filemap_fault(), the DAX fault path can call into here on
|
||||
* both read and write faults. Hence we need to handle both cases. There is no
|
||||
* ->huge_mkwrite callout for huge pages, so we have a single function here to
|
||||
* handle both cases here. @flags carries the information on the type of fault
|
||||
* occuring.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_filemap_huge_fault(
|
||||
static int
|
||||
__xfs_filemap_fault(
|
||||
struct vm_fault *vmf,
|
||||
enum page_entry_size pe_size)
|
||||
enum page_entry_size pe_size,
|
||||
bool write_fault)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
int ret;
|
||||
|
||||
if (!IS_DAX(inode))
|
||||
return VM_FAULT_FALLBACK;
|
||||
trace_xfs_filemap_fault(ip, pe_size, write_fault);
|
||||
|
||||
trace_xfs_filemap_huge_fault(ip);
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE) {
|
||||
if (write_fault) {
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
}
|
||||
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
|
||||
if (IS_DAX(inode)) {
|
||||
ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
|
||||
} else {
|
||||
if (write_fault)
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
|
||||
else
|
||||
ret = filemap_fault(vmf);
|
||||
}
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
if (write_fault)
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_filemap_fault(
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
/* DAX can shortcut the normal fault path on write faults! */
|
||||
return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
|
||||
IS_DAX(file_inode(vmf->vma->vm_file)) &&
|
||||
(vmf->flags & FAULT_FLAG_WRITE));
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_filemap_huge_fault(
|
||||
struct vm_fault *vmf,
|
||||
enum page_entry_size pe_size)
|
||||
{
|
||||
if (!IS_DAX(file_inode(vmf->vma->vm_file)))
|
||||
return VM_FAULT_FALLBACK;
|
||||
|
||||
/* DAX can shortcut the normal fault path on write faults! */
|
||||
return __xfs_filemap_fault(vmf, pe_size,
|
||||
(vmf->flags & FAULT_FLAG_WRITE));
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_filemap_page_mkwrite(
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
|
||||
* updates on write faults. In reality, it's need to serialise against
|
||||
|
@ -1124,11 +1124,11 @@ reclaim:
|
||||
* Because we use RCU freeing we need to ensure the inode always appears
|
||||
* to be reclaimed with an invalid inode number when in the free state.
|
||||
* We do this as early as possible under the ILOCK so that
|
||||
* xfs_iflush_cluster() can be guaranteed to detect races with us here.
|
||||
* By doing this, we guarantee that once xfs_iflush_cluster has locked
|
||||
* XFS_ILOCK that it will see either a valid, flushable inode that will
|
||||
* serialise correctly, or it will see a clean (and invalid) inode that
|
||||
* it can skip.
|
||||
* xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to
|
||||
* detect races with us here. By doing this, we guarantee that once
|
||||
* xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that
|
||||
* it will see either a valid inode that will serialise correctly, or it
|
||||
* will see an invalid inode that it can skip.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
ip->i_flags = XFS_IRECLAIM;
|
||||
|
@ -1055,7 +1055,7 @@ xfs_dir_ialloc(
|
||||
tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
|
||||
}
|
||||
|
||||
code = xfs_trans_roll(&tp, NULL);
|
||||
code = xfs_trans_roll(&tp);
|
||||
if (committed != NULL)
|
||||
*committed = 1;
|
||||
|
||||
@ -1285,7 +1285,7 @@ xfs_create(
|
||||
*/
|
||||
xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -1513,7 +1513,7 @@ xfs_link(
|
||||
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
|
||||
xfs_trans_set_sync(tp);
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error) {
|
||||
xfs_defer_cancel(&dfops);
|
||||
goto error_return;
|
||||
@ -1607,11 +1607,12 @@ xfs_itruncate_extents(
|
||||
* Duplicate the transaction that has the permanent
|
||||
* reservation and commit the old transaction.
|
||||
*/
|
||||
error = xfs_defer_finish(&tp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
error = xfs_trans_roll(&tp, ip);
|
||||
error = xfs_trans_roll_inode(&tp, ip);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
@ -1855,7 +1856,7 @@ xfs_inactive_ifree(
|
||||
* Just ignore errors at this point. There is nothing we can do except
|
||||
* to try to keep going. Make sure it's not a silent error.
|
||||
*/
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error) {
|
||||
xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
|
||||
__func__, error);
|
||||
@ -2359,11 +2360,24 @@ retry:
|
||||
* already marked stale. If we can't lock it, back off
|
||||
* and retry.
|
||||
*/
|
||||
if (ip != free_ip &&
|
||||
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
|
||||
rcu_read_unlock();
|
||||
delay(1);
|
||||
goto retry;
|
||||
if (ip != free_ip) {
|
||||
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
|
||||
rcu_read_unlock();
|
||||
delay(1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the inode number again in case we're
|
||||
* racing with freeing in xfs_reclaim_inode().
|
||||
* See the comments in that function for more
|
||||
* information as to why the initial check is
|
||||
* not sufficient.
|
||||
*/
|
||||
if (ip->i_ino != inum + i) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -2637,7 +2651,7 @@ xfs_remove(
|
||||
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
|
||||
xfs_trans_set_sync(tp);
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -2723,7 +2737,7 @@ xfs_finish_rename(
|
||||
if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
|
||||
xfs_trans_set_sync(tp);
|
||||
|
||||
error = xfs_defer_finish(&tp, dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, dfops);
|
||||
if (error) {
|
||||
xfs_defer_cancel(dfops);
|
||||
xfs_trans_cancel(tp);
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_log.h"
|
||||
|
||||
|
||||
@ -475,6 +476,23 @@ xfs_inode_item_unpin(
|
||||
wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
|
||||
* have been failed during writeback
|
||||
*
|
||||
* This informs the AIL that the inode is already flush locked on the next push,
|
||||
* and acquires a hold on the buffer to ensure that it isn't reclaimed before
|
||||
* dirty data makes it to disk.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_inode_item_error(
|
||||
struct xfs_log_item *lip,
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
|
||||
xfs_set_li_failed(lip, bp);
|
||||
}
|
||||
|
||||
STATIC uint
|
||||
xfs_inode_item_push(
|
||||
struct xfs_log_item *lip,
|
||||
@ -484,13 +502,28 @@ xfs_inode_item_push(
|
||||
{
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
struct xfs_buf *bp = NULL;
|
||||
struct xfs_buf *bp = lip->li_buf;
|
||||
uint rval = XFS_ITEM_SUCCESS;
|
||||
int error;
|
||||
|
||||
if (xfs_ipincount(ip) > 0)
|
||||
return XFS_ITEM_PINNED;
|
||||
|
||||
/*
|
||||
* The buffer containing this item failed to be written back
|
||||
* previously. Resubmit the buffer for IO.
|
||||
*/
|
||||
if (lip->li_flags & XFS_LI_FAILED) {
|
||||
if (!xfs_buf_trylock(bp))
|
||||
return XFS_ITEM_LOCKED;
|
||||
|
||||
if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
|
||||
rval = XFS_ITEM_FLUSHING;
|
||||
|
||||
xfs_buf_unlock(bp);
|
||||
return rval;
|
||||
}
|
||||
|
||||
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
|
||||
return XFS_ITEM_LOCKED;
|
||||
|
||||
@ -622,7 +655,8 @@ static const struct xfs_item_ops xfs_inode_item_ops = {
|
||||
.iop_unlock = xfs_inode_item_unlock,
|
||||
.iop_committed = xfs_inode_item_committed,
|
||||
.iop_push = xfs_inode_item_push,
|
||||
.iop_committing = xfs_inode_item_committing
|
||||
.iop_committing = xfs_inode_item_committing,
|
||||
.iop_error = xfs_inode_item_error
|
||||
};
|
||||
|
||||
|
||||
@ -710,7 +744,8 @@ xfs_iflush_done(
|
||||
* the AIL lock.
|
||||
*/
|
||||
iip = INODE_ITEM(blip);
|
||||
if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
|
||||
if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
|
||||
lip->li_flags & XFS_LI_FAILED)
|
||||
need_ail++;
|
||||
|
||||
blip = next;
|
||||
@ -718,7 +753,8 @@ xfs_iflush_done(
|
||||
|
||||
/* make sure we capture the state of the initial inode. */
|
||||
iip = INODE_ITEM(lip);
|
||||
if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
|
||||
if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
|
||||
lip->li_flags & XFS_LI_FAILED)
|
||||
need_ail++;
|
||||
|
||||
/*
|
||||
@ -739,6 +775,9 @@ xfs_iflush_done(
|
||||
if (INODE_ITEM(blip)->ili_logged &&
|
||||
blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
|
||||
mlip_changed |= xfs_ail_delete_one(ailp, blip);
|
||||
else {
|
||||
xfs_clear_li_failed(blip);
|
||||
}
|
||||
}
|
||||
|
||||
if (mlip_changed) {
|
||||
|
@ -931,16 +931,15 @@ xfs_ioc_fsgetxattr(
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_set_diflags(
|
||||
STATIC uint16_t
|
||||
xfs_flags2diflags(
|
||||
struct xfs_inode *ip,
|
||||
unsigned int xflags)
|
||||
{
|
||||
unsigned int di_flags;
|
||||
uint64_t di_flags2;
|
||||
|
||||
/* can't set PREALLOC this way, just preserve it */
|
||||
di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
|
||||
uint16_t di_flags =
|
||||
(ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
|
||||
|
||||
if (xflags & FS_XFLAG_IMMUTABLE)
|
||||
di_flags |= XFS_DIFLAG_IMMUTABLE;
|
||||
if (xflags & FS_XFLAG_APPEND)
|
||||
@ -970,19 +969,24 @@ xfs_set_diflags(
|
||||
if (xflags & FS_XFLAG_EXTSIZE)
|
||||
di_flags |= XFS_DIFLAG_EXTSIZE;
|
||||
}
|
||||
ip->i_d.di_flags = di_flags;
|
||||
|
||||
/* diflags2 only valid for v3 inodes. */
|
||||
if (ip->i_d.di_version < 3)
|
||||
return;
|
||||
return di_flags;
|
||||
}
|
||||
|
||||
STATIC uint64_t
|
||||
xfs_flags2diflags2(
|
||||
struct xfs_inode *ip,
|
||||
unsigned int xflags)
|
||||
{
|
||||
uint64_t di_flags2 =
|
||||
(ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
|
||||
|
||||
di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
|
||||
if (xflags & FS_XFLAG_DAX)
|
||||
di_flags2 |= XFS_DIFLAG2_DAX;
|
||||
if (xflags & FS_XFLAG_COWEXTSIZE)
|
||||
di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
|
||||
|
||||
ip->i_d.di_flags2 = di_flags2;
|
||||
return di_flags2;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
@ -1008,11 +1012,12 @@ xfs_diflags_to_linux(
|
||||
inode->i_flags |= S_NOATIME;
|
||||
else
|
||||
inode->i_flags &= ~S_NOATIME;
|
||||
#if 0 /* disabled until the flag switching races are sorted out */
|
||||
if (xflags & FS_XFLAG_DAX)
|
||||
inode->i_flags |= S_DAX;
|
||||
else
|
||||
inode->i_flags &= ~S_DAX;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1022,6 +1027,7 @@ xfs_ioctl_setattr_xflags(
|
||||
struct fsxattr *fa)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
uint64_t di_flags2;
|
||||
|
||||
/* Can't change realtime flag if any extents are allocated. */
|
||||
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
|
||||
@ -1052,7 +1058,14 @@ xfs_ioctl_setattr_xflags(
|
||||
!capable(CAP_LINUX_IMMUTABLE))
|
||||
return -EPERM;
|
||||
|
||||
xfs_set_diflags(ip, fa->fsx_xflags);
|
||||
/* diflags2 only valid for v3 inodes. */
|
||||
di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
|
||||
if (di_flags2 && ip->i_d.di_version < 3)
|
||||
return -EINVAL;
|
||||
|
||||
ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags);
|
||||
ip->i_d.di_flags2 = di_flags2;
|
||||
|
||||
xfs_diflags_to_linux(ip);
|
||||
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
|
@ -274,7 +274,7 @@ xfs_iomap_write_direct(
|
||||
/*
|
||||
* Complete the transaction
|
||||
*/
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -520,7 +520,6 @@ xfs_file_iomap_begin_delay(
|
||||
struct inode *inode,
|
||||
loff_t offset,
|
||||
loff_t count,
|
||||
unsigned flags,
|
||||
struct iomap *iomap)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
@ -784,7 +783,7 @@ xfs_iomap_write_allocate(
|
||||
if (error)
|
||||
goto trans_cancel;
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto trans_cancel;
|
||||
|
||||
@ -906,7 +905,7 @@ xfs_iomap_write_unwritten(
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
}
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto error_on_bmapi_transaction;
|
||||
|
||||
@ -984,8 +983,7 @@ xfs_file_iomap_begin(
|
||||
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
|
||||
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
|
||||
/* Reserve delalloc blocks for regular writeback. */
|
||||
return xfs_file_iomap_begin_delay(inode, offset, length, flags,
|
||||
iomap);
|
||||
return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
|
||||
}
|
||||
|
||||
if (need_excl_ilock(ip, flags)) {
|
||||
|
@ -817,7 +817,7 @@ xfs_vn_setattr_nonsize(
|
||||
* Caution: The caller of this function is responsible for calling
|
||||
* setattr_prepare() or otherwise verifying the change is fine.
|
||||
*/
|
||||
int
|
||||
STATIC int
|
||||
xfs_setattr_size(
|
||||
struct xfs_inode *ip,
|
||||
struct iattr *iattr)
|
||||
|
@ -743,10 +743,14 @@ xfs_log_mount_finish(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
int error = 0;
|
||||
bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
|
||||
|
||||
if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
|
||||
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
|
||||
return 0;
|
||||
} else if (readonly) {
|
||||
/* Allow unlinked processing to proceed */
|
||||
mp->m_flags &= ~XFS_MOUNT_RDONLY;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -757,12 +761,27 @@ xfs_log_mount_finish(
|
||||
* inodes. Turn it off immediately after recovery finishes
|
||||
* so that we don't leak the quota inodes if subsequent mount
|
||||
* activities fail.
|
||||
*
|
||||
* We let all inodes involved in redo item processing end up on
|
||||
* the LRU instead of being evicted immediately so that if we do
|
||||
* something to an unlinked inode, the irele won't cause
|
||||
* premature truncation and freeing of the inode, which results
|
||||
* in log recovery failure. We have to evict the unreferenced
|
||||
* lru inodes after clearing MS_ACTIVE because we don't
|
||||
* otherwise clean up the lru if there's a subsequent failure in
|
||||
* xfs_mountfs, which leads to us leaking the inodes if nothing
|
||||
* else (e.g. quotacheck) references the inodes before the
|
||||
* mount failure occurs.
|
||||
*/
|
||||
mp->m_super->s_flags |= MS_ACTIVE;
|
||||
error = xlog_recover_finish(mp->m_log);
|
||||
if (!error)
|
||||
xfs_log_work_queue(mp);
|
||||
mp->m_super->s_flags &= ~MS_ACTIVE;
|
||||
evict_inodes(mp->m_super);
|
||||
|
||||
if (readonly)
|
||||
mp->m_flags |= XFS_MOUNT_RDONLY;
|
||||
|
||||
return error;
|
||||
}
|
||||
@ -812,11 +831,14 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Don't write out unmount record on read-only mounts.
|
||||
* Don't write out unmount record on norecovery mounts or ro devices.
|
||||
* Or, if we are doing a forced umount (typically because of IO errors).
|
||||
*/
|
||||
if (mp->m_flags & XFS_MOUNT_RDONLY)
|
||||
if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
|
||||
xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
|
||||
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
|
||||
ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
|
||||
@ -3353,8 +3375,6 @@ maybe_sleep:
|
||||
*/
|
||||
if (iclog->ic_state & XLOG_STATE_IOERROR)
|
||||
return -EIO;
|
||||
if (log_flushed)
|
||||
*log_flushed = 1;
|
||||
} else {
|
||||
|
||||
no_sleep:
|
||||
@ -3458,8 +3478,6 @@ try_again:
|
||||
|
||||
xlog_wait(&iclog->ic_prev->ic_write_wait,
|
||||
&log->l_icloglock);
|
||||
if (log_flushed)
|
||||
*log_flushed = 1;
|
||||
already_slept = 1;
|
||||
goto try_again;
|
||||
}
|
||||
@ -3493,9 +3511,6 @@ try_again:
|
||||
*/
|
||||
if (iclog->ic_state & XLOG_STATE_IOERROR)
|
||||
return -EIO;
|
||||
|
||||
if (log_flushed)
|
||||
*log_flushed = 1;
|
||||
} else { /* just return */
|
||||
spin_unlock(&log->l_icloglock);
|
||||
}
|
||||
|
@ -1029,61 +1029,106 @@ out_error:
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the log tail for torn writes. This is required when torn writes are
|
||||
* detected at the head and the head had to be walked back to a previous record.
|
||||
* The tail of the previous record must now be verified to ensure the torn
|
||||
* writes didn't corrupt the previous tail.
|
||||
* Calculate distance from head to tail (i.e., unused space in the log).
|
||||
*/
|
||||
static inline int
|
||||
xlog_tail_distance(
|
||||
struct xlog *log,
|
||||
xfs_daddr_t head_blk,
|
||||
xfs_daddr_t tail_blk)
|
||||
{
|
||||
if (head_blk < tail_blk)
|
||||
return tail_blk - head_blk;
|
||||
|
||||
return tail_blk + (log->l_logBBsize - head_blk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify the log tail. This is particularly important when torn or incomplete
|
||||
* writes have been detected near the front of the log and the head has been
|
||||
* walked back accordingly.
|
||||
*
|
||||
* Return an error if CRC verification fails as recovery cannot proceed.
|
||||
* We also have to handle the case where the tail was pinned and the head
|
||||
* blocked behind the tail right before a crash. If the tail had been pushed
|
||||
* immediately prior to the crash and the subsequent checkpoint was only
|
||||
* partially written, it's possible it overwrote the last referenced tail in the
|
||||
* log with garbage. This is not a coherency problem because the tail must have
|
||||
* been pushed before it can be overwritten, but appears as log corruption to
|
||||
* recovery because we have no way to know the tail was updated if the
|
||||
* subsequent checkpoint didn't write successfully.
|
||||
*
|
||||
* Therefore, CRC check the log from tail to head. If a failure occurs and the
|
||||
* offending record is within max iclog bufs from the head, walk the tail
|
||||
* forward and retry until a valid tail is found or corruption is detected out
|
||||
* of the range of a possible overwrite.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_verify_tail(
|
||||
struct xlog *log,
|
||||
xfs_daddr_t head_blk,
|
||||
xfs_daddr_t tail_blk)
|
||||
xfs_daddr_t *tail_blk,
|
||||
int hsize)
|
||||
{
|
||||
struct xlog_rec_header *thead;
|
||||
struct xfs_buf *bp;
|
||||
xfs_daddr_t first_bad;
|
||||
int count;
|
||||
int error = 0;
|
||||
bool wrapped;
|
||||
xfs_daddr_t tmp_head;
|
||||
xfs_daddr_t tmp_tail;
|
||||
xfs_daddr_t orig_tail = *tail_blk;
|
||||
|
||||
bp = xlog_get_bp(log, 1);
|
||||
if (!bp)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
|
||||
* a temporary head block that points after the last possible
|
||||
* concurrently written record of the tail.
|
||||
* Make sure the tail points to a record (returns positive count on
|
||||
* success).
|
||||
*/
|
||||
count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
|
||||
XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
|
||||
&wrapped);
|
||||
if (count < 0) {
|
||||
error = count;
|
||||
error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
|
||||
&tmp_tail, &thead, &wrapped);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
if (*tail_blk != tmp_tail)
|
||||
*tail_blk = tmp_tail;
|
||||
|
||||
/*
|
||||
* Run a CRC check from the tail to the head. We can't just check
|
||||
* MAX_ICLOGS records past the tail because the tail may point to stale
|
||||
* blocks cleared during the search for the head/tail. These blocks are
|
||||
* overwritten with zero-length records and thus record count is not a
|
||||
* reliable indicator of the iclog state before a crash.
|
||||
*/
|
||||
first_bad = 0;
|
||||
error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
|
||||
XLOG_RECOVER_CRCPASS, &first_bad);
|
||||
while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
|
||||
int tail_distance;
|
||||
|
||||
/*
|
||||
* Is corruption within range of the head? If so, retry from
|
||||
* the next record. Otherwise return an error.
|
||||
*/
|
||||
tail_distance = xlog_tail_distance(log, head_blk, first_bad);
|
||||
if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize))
|
||||
break;
|
||||
|
||||
/* skip to the next record; returns positive count on success */
|
||||
error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
|
||||
&tmp_tail, &thead, &wrapped);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
|
||||
*tail_blk = tmp_tail;
|
||||
first_bad = 0;
|
||||
error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
|
||||
XLOG_RECOVER_CRCPASS, &first_bad);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
|
||||
* into the actual log head. tmp_head points to the start of the record
|
||||
* so update it to the actual head block.
|
||||
*/
|
||||
if (count < XLOG_MAX_ICLOGS + 1)
|
||||
tmp_head = head_blk;
|
||||
|
||||
/*
|
||||
* We now have a tail and temporary head block that covers at least
|
||||
* XLOG_MAX_ICLOGS records from the tail. We need to verify that these
|
||||
* records were completely written. Run a CRC verification pass from
|
||||
* tail to head and return the result.
|
||||
*/
|
||||
error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
|
||||
XLOG_RECOVER_CRCPASS, &first_bad);
|
||||
|
||||
if (!error && *tail_blk != orig_tail)
|
||||
xfs_warn(log->l_mp,
|
||||
"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
|
||||
orig_tail, *tail_blk);
|
||||
out:
|
||||
xlog_put_bp(bp);
|
||||
return error;
|
||||
@ -1143,7 +1188,7 @@ xlog_verify_head(
|
||||
*/
|
||||
error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
|
||||
XLOG_RECOVER_CRCPASS, &first_bad);
|
||||
if (error == -EFSBADCRC) {
|
||||
if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
|
||||
/*
|
||||
* We've hit a potential torn write. Reset the error and warn
|
||||
* about it.
|
||||
@ -1183,31 +1228,12 @@ xlog_verify_head(
|
||||
ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now verify the tail based on the updated head. This is
|
||||
* required because the torn writes trimmed from the head could
|
||||
* have been written over the tail of a previous record. Return
|
||||
* any errors since recovery cannot proceed if the tail is
|
||||
* corrupt.
|
||||
*
|
||||
* XXX: This leaves a gap in truly robust protection from torn
|
||||
* writes in the log. If the head is behind the tail, the tail
|
||||
* pushes forward to create some space and then a crash occurs
|
||||
* causing the writes into the previous record's tail region to
|
||||
* tear, log recovery isn't able to recover.
|
||||
*
|
||||
* How likely is this to occur? If possible, can we do something
|
||||
* more intelligent here? Is it safe to push the tail forward if
|
||||
* we can determine that the tail is within the range of the
|
||||
* torn write (e.g., the kernel can only overwrite the tail if
|
||||
* it has actually been pushed forward)? Alternatively, could we
|
||||
* somehow prevent this condition at runtime?
|
||||
*/
|
||||
error = xlog_verify_tail(log, *head_blk, *tail_blk);
|
||||
}
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return error;
|
||||
return xlog_verify_tail(log, *head_blk, tail_blk,
|
||||
be32_to_cpu((*rhead)->h_size));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4801,12 +4827,16 @@ xlog_recover_process_intents(
|
||||
int error = 0;
|
||||
struct xfs_ail_cursor cur;
|
||||
struct xfs_ail *ailp;
|
||||
#if defined(DEBUG) || defined(XFS_WARN)
|
||||
xfs_lsn_t last_lsn;
|
||||
#endif
|
||||
|
||||
ailp = log->l_ailp;
|
||||
spin_lock(&ailp->xa_lock);
|
||||
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
|
||||
#if defined(DEBUG) || defined(XFS_WARN)
|
||||
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
|
||||
#endif
|
||||
while (lip != NULL) {
|
||||
/*
|
||||
* We're done when we see something other than an intent.
|
||||
@ -5218,7 +5248,7 @@ xlog_do_recovery_pass(
|
||||
xfs_daddr_t *first_bad) /* out: first bad log rec */
|
||||
{
|
||||
xlog_rec_header_t *rhead;
|
||||
xfs_daddr_t blk_no;
|
||||
xfs_daddr_t blk_no, rblk_no;
|
||||
xfs_daddr_t rhead_blk;
|
||||
char *offset;
|
||||
xfs_buf_t *hbp, *dbp;
|
||||
@ -5231,7 +5261,7 @@ xlog_do_recovery_pass(
|
||||
LIST_HEAD (buffer_list);
|
||||
|
||||
ASSERT(head_blk != tail_blk);
|
||||
rhead_blk = 0;
|
||||
blk_no = rhead_blk = tail_blk;
|
||||
|
||||
for (i = 0; i < XLOG_RHASH_SIZE; i++)
|
||||
INIT_HLIST_HEAD(&rhash[i]);
|
||||
@ -5309,7 +5339,6 @@ xlog_do_recovery_pass(
|
||||
}
|
||||
|
||||
memset(rhash, 0, sizeof(rhash));
|
||||
blk_no = rhead_blk = tail_blk;
|
||||
if (tail_blk > head_blk) {
|
||||
/*
|
||||
* Perform recovery around the end of the physical log.
|
||||
@ -5371,9 +5400,19 @@ xlog_do_recovery_pass(
|
||||
bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
|
||||
blk_no += hblks;
|
||||
|
||||
/* Read in data for log record */
|
||||
if (blk_no + bblks <= log->l_logBBsize) {
|
||||
error = xlog_bread(log, blk_no, bblks, dbp,
|
||||
/*
|
||||
* Read the log record data in multiple reads if it
|
||||
* wraps around the end of the log. Note that if the
|
||||
* header already wrapped, blk_no could point past the
|
||||
* end of the log. The record data is contiguous in
|
||||
* that case.
|
||||
*/
|
||||
if (blk_no + bblks <= log->l_logBBsize ||
|
||||
blk_no >= log->l_logBBsize) {
|
||||
/* mod blk_no in case the header wrapped and
|
||||
* pushed it beyond the end of the log */
|
||||
rblk_no = do_mod(blk_no, log->l_logBBsize);
|
||||
error = xlog_bread(log, rblk_no, bblks, dbp,
|
||||
&offset);
|
||||
if (error)
|
||||
goto bread_err2;
|
||||
@ -5563,6 +5602,8 @@ xlog_do_recover(
|
||||
xfs_buf_t *bp;
|
||||
xfs_sb_t *sbp;
|
||||
|
||||
trace_xfs_log_recover(log, head_blk, tail_blk);
|
||||
|
||||
/*
|
||||
* First replay the images in the log.
|
||||
*/
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_bmap_util.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_qm.h"
|
||||
@ -1120,31 +1121,6 @@ xfs_qm_quotacheck_dqadjust(
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_qm_get_rtblks(
|
||||
xfs_inode_t *ip,
|
||||
xfs_qcnt_t *O_rtblks)
|
||||
{
|
||||
xfs_filblks_t rtblks; /* total rt blks */
|
||||
xfs_extnum_t idx; /* extent record index */
|
||||
xfs_ifork_t *ifp; /* inode fork pointer */
|
||||
xfs_extnum_t nextents; /* number of extent entries */
|
||||
int error;
|
||||
|
||||
ASSERT(XFS_IS_REALTIME_INODE(ip));
|
||||
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
|
||||
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
|
||||
if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
|
||||
return error;
|
||||
}
|
||||
rtblks = 0;
|
||||
nextents = xfs_iext_count(ifp);
|
||||
for (idx = 0; idx < nextents; idx++)
|
||||
rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
|
||||
*O_rtblks = (xfs_qcnt_t)rtblks;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* callback routine supplied to bulkstat(). Given an inumber, find its
|
||||
* dquots and update them to account for resources taken by that inode.
|
||||
@ -1160,7 +1136,8 @@ xfs_qm_dqusage_adjust(
|
||||
int *res) /* result code value */
|
||||
{
|
||||
xfs_inode_t *ip;
|
||||
xfs_qcnt_t nblks, rtblks = 0;
|
||||
xfs_qcnt_t nblks;
|
||||
xfs_filblks_t rtblks = 0; /* total rt blks */
|
||||
int error;
|
||||
|
||||
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
|
||||
@ -1190,12 +1167,15 @@ xfs_qm_dqusage_adjust(
|
||||
ASSERT(ip->i_delayed_blks == 0);
|
||||
|
||||
if (XFS_IS_REALTIME_INODE(ip)) {
|
||||
/*
|
||||
* Walk thru the extent list and count the realtime blocks.
|
||||
*/
|
||||
error = xfs_qm_get_rtblks(ip, &rtblks);
|
||||
if (error)
|
||||
goto error0;
|
||||
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
|
||||
|
||||
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
|
||||
error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
|
||||
if (error)
|
||||
goto error0;
|
||||
}
|
||||
|
||||
xfs_bmap_count_leaves(ifp, &rtblks);
|
||||
}
|
||||
|
||||
nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
|
||||
|
@ -525,7 +525,7 @@ xfs_cui_recover(
|
||||
}
|
||||
|
||||
xfs_refcount_finish_one_cleanup(tp, rcur, error);
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto abort_defer;
|
||||
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
|
||||
|
@ -464,7 +464,7 @@ retry:
|
||||
goto out_bmap_cancel;
|
||||
|
||||
/* Finish up. */
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -602,7 +602,8 @@ xfs_reflink_cancel_cow_blocks(
|
||||
-(long)del.br_blockcount);
|
||||
|
||||
/* Roll the transaction */
|
||||
error = xfs_defer_finish(tpp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(tpp, &dfops);
|
||||
if (error) {
|
||||
xfs_defer_cancel(&dfops);
|
||||
break;
|
||||
@ -791,7 +792,8 @@ xfs_reflink_end_cow(
|
||||
/* Remove the mapping from the CoW fork. */
|
||||
xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_defer;
|
||||
next_extent:
|
||||
@ -1152,7 +1154,8 @@ xfs_reflink_remap_extent(
|
||||
|
||||
next_extent:
|
||||
/* Process all the deferred stuff. */
|
||||
error = xfs_defer_finish(&tp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_defer;
|
||||
}
|
||||
|
@ -810,7 +810,7 @@ xfs_growfs_rt_alloc(
|
||||
/*
|
||||
* Free any blocks freed up in the transaction, then commit.
|
||||
*/
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
error = xfs_trans_commit(tp);
|
||||
|
@ -1220,7 +1220,7 @@ xfs_test_remount_options(
|
||||
tmp_mp->m_super = sb;
|
||||
error = xfs_parseargs(tmp_mp, options);
|
||||
xfs_free_fsname(tmp_mp);
|
||||
kfree(tmp_mp);
|
||||
kmem_free(tmp_mp);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
@ -378,7 +378,7 @@ xfs_symlink(
|
||||
xfs_trans_set_sync(tp);
|
||||
}
|
||||
|
||||
error = xfs_defer_finish(&tp, &dfops, NULL);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto out_bmap_cancel;
|
||||
|
||||
@ -497,7 +497,8 @@ xfs_inactive_symlink_rmt(
|
||||
/*
|
||||
* Commit the first transaction. This logs the EFI and the inode.
|
||||
*/
|
||||
error = xfs_defer_finish(&tp, &dfops, ip);
|
||||
xfs_defer_ijoin(&dfops, ip);
|
||||
error = xfs_defer_finish(&tp, &dfops);
|
||||
if (error)
|
||||
goto error_bmap_cancel;
|
||||
/*
|
||||
|
@ -517,7 +517,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
|
||||
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
|
||||
@ -689,11 +688,34 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
|
||||
DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
|
||||
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
|
||||
|
||||
DEFINE_INODE_EVENT(xfs_filemap_fault);
|
||||
DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
|
||||
DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
|
||||
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
|
||||
|
||||
TRACE_EVENT(xfs_filemap_fault,
|
||||
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
|
||||
bool write_fault),
|
||||
TP_ARGS(ip, pe_size, write_fault),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_ino_t, ino)
|
||||
__field(enum page_entry_size, pe_size)
|
||||
__field(bool, write_fault)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = VFS_I(ip)->i_sb->s_dev;
|
||||
__entry->ino = ip->i_ino;
|
||||
__entry->pe_size = pe_size;
|
||||
__entry->write_fault = write_fault;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx %s write_fault %d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__print_symbolic(__entry->pe_size,
|
||||
{ PE_SIZE_PTE, "PTE" },
|
||||
{ PE_SIZE_PMD, "PMD" },
|
||||
{ PE_SIZE_PUD, "PUD" }),
|
||||
__entry->write_fault)
|
||||
)
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_iref_class,
|
||||
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
|
||||
TP_ARGS(ip, caller_ip),
|
||||
@ -1963,6 +1985,24 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
|
||||
DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
|
||||
DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
|
||||
|
||||
TRACE_EVENT(xfs_log_recover,
|
||||
TP_PROTO(struct xlog *log, xfs_daddr_t headblk, xfs_daddr_t tailblk),
|
||||
TP_ARGS(log, headblk, tailblk),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_daddr_t, headblk)
|
||||
__field(xfs_daddr_t, tailblk)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = log->l_mp->m_super->s_dev;
|
||||
__entry->headblk = headblk;
|
||||
__entry->tailblk = tailblk;
|
||||
),
|
||||
TP_printk("dev %d:%d headblk 0x%llx tailblk 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->headblk,
|
||||
__entry->tailblk)
|
||||
)
|
||||
|
||||
TRACE_EVENT(xfs_log_recover_record,
|
||||
TP_PROTO(struct xlog *log, struct xlog_rec_header *rhead, int pass),
|
||||
TP_ARGS(log, rhead, pass),
|
||||
|
@ -1035,25 +1035,18 @@ xfs_trans_cancel(
|
||||
*/
|
||||
int
|
||||
xfs_trans_roll(
|
||||
struct xfs_trans **tpp,
|
||||
struct xfs_inode *dp)
|
||||
struct xfs_trans **tpp)
|
||||
{
|
||||
struct xfs_trans *trans;
|
||||
struct xfs_trans *trans = *tpp;
|
||||
struct xfs_trans_res tres;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Ensure that the inode is always logged.
|
||||
*/
|
||||
trans = *tpp;
|
||||
if (dp)
|
||||
xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
|
||||
|
||||
/*
|
||||
* Copy the critical parameters from one trans to the next.
|
||||
*/
|
||||
tres.tr_logres = trans->t_log_res;
|
||||
tres.tr_logcount = trans->t_log_count;
|
||||
|
||||
*tpp = xfs_trans_dup(trans);
|
||||
|
||||
/*
|
||||
@ -1067,10 +1060,8 @@ xfs_trans_roll(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trans = *tpp;
|
||||
|
||||
/*
|
||||
* Reserve space in the log for th next transaction.
|
||||
* Reserve space in the log for the next transaction.
|
||||
* This also pushes items in the "AIL", the list of logged items,
|
||||
* out to disk if they are taking up space at the tail of the log
|
||||
* that we want to use. This requires that either nothing be locked
|
||||
@ -1078,14 +1069,5 @@ xfs_trans_roll(
|
||||
* the prior and the next transactions.
|
||||
*/
|
||||
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
|
||||
error = xfs_trans_reserve(trans, &tres, 0, 0);
|
||||
/*
|
||||
* Ensure that the inode is in the new transaction and locked.
|
||||
*/
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (dp)
|
||||
xfs_trans_ijoin(trans, dp, 0);
|
||||
return 0;
|
||||
return xfs_trans_reserve(*tpp, &tres, 0, 0);
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ typedef struct xfs_log_item {
|
||||
struct xfs_ail *li_ailp; /* ptr to AIL */
|
||||
uint li_type; /* item type */
|
||||
uint li_flags; /* misc flags */
|
||||
struct xfs_buf *li_buf; /* real buffer pointer */
|
||||
struct xfs_log_item *li_bio_list; /* buffer item list */
|
||||
void (*li_cb)(struct xfs_buf *,
|
||||
struct xfs_log_item *);
|
||||
@ -64,11 +65,13 @@ typedef struct xfs_log_item {
|
||||
} xfs_log_item_t;
|
||||
|
||||
#define XFS_LI_IN_AIL 0x1
|
||||
#define XFS_LI_ABORTED 0x2
|
||||
#define XFS_LI_ABORTED 0x2
|
||||
#define XFS_LI_FAILED 0x4
|
||||
|
||||
#define XFS_LI_FLAGS \
|
||||
{ XFS_LI_IN_AIL, "IN_AIL" }, \
|
||||
{ XFS_LI_ABORTED, "ABORTED" }
|
||||
{ XFS_LI_ABORTED, "ABORTED" }, \
|
||||
{ XFS_LI_FAILED, "FAILED" }
|
||||
|
||||
struct xfs_item_ops {
|
||||
void (*iop_size)(xfs_log_item_t *, int *, int *);
|
||||
@ -79,6 +82,7 @@ struct xfs_item_ops {
|
||||
void (*iop_unlock)(xfs_log_item_t *);
|
||||
xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
|
||||
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
|
||||
void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
|
||||
};
|
||||
|
||||
void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
|
||||
@ -208,12 +212,14 @@ void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
|
||||
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
|
||||
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
|
||||
void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
|
||||
void xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
|
||||
bool xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
|
||||
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
|
||||
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
|
||||
void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
|
||||
void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
|
||||
void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
|
||||
void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint,
|
||||
uint);
|
||||
void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
|
||||
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
|
||||
|
||||
void xfs_extent_free_init_defer_op(void);
|
||||
@ -224,7 +230,8 @@ int xfs_trans_free_extent(struct xfs_trans *,
|
||||
struct xfs_efd_log_item *, xfs_fsblock_t,
|
||||
xfs_extlen_t, struct xfs_owner_info *);
|
||||
int xfs_trans_commit(struct xfs_trans *);
|
||||
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
|
||||
int xfs_trans_roll(struct xfs_trans **);
|
||||
int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
|
||||
void xfs_trans_cancel(xfs_trans_t *);
|
||||
int xfs_trans_ail_init(struct xfs_mount *);
|
||||
void xfs_trans_ail_destroy(struct xfs_mount *);
|
||||
|
@ -325,6 +325,21 @@ xfs_ail_delete(
|
||||
xfs_trans_ail_cursor_clear(ailp, lip);
|
||||
}
|
||||
|
||||
static inline uint
|
||||
xfsaild_push_item(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
/*
|
||||
* If log item pinning is enabled, skip the push and track the item as
|
||||
* pinned. This can help induce head-behind-tail conditions.
|
||||
*/
|
||||
if (XFS_TEST_ERROR(false, ailp->xa_mount, XFS_ERRTAG_LOG_ITEM_PIN))
|
||||
return XFS_ITEM_PINNED;
|
||||
|
||||
return lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
|
||||
}
|
||||
|
||||
static long
|
||||
xfsaild_push(
|
||||
struct xfs_ail *ailp)
|
||||
@ -382,7 +397,7 @@ xfsaild_push(
|
||||
* rely on the AIL cursor implementation to be able to deal with
|
||||
* the dropped lock.
|
||||
*/
|
||||
lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
|
||||
lock_result = xfsaild_push_item(ailp, lip);
|
||||
switch (lock_result) {
|
||||
case XFS_ITEM_SUCCESS:
|
||||
XFS_STATS_INC(mp, xs_push_ail_success);
|
||||
@ -687,12 +702,13 @@ xfs_trans_ail_update_bulk(
|
||||
bool
|
||||
xfs_ail_delete_one(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item *lip)
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_log_item *mlip = xfs_ail_min(ailp);
|
||||
|
||||
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
|
||||
xfs_ail_delete(ailp, lip);
|
||||
xfs_clear_li_failed(lip);
|
||||
lip->li_flags &= ~XFS_LI_IN_AIL;
|
||||
lip->li_lsn = 0;
|
||||
|
||||
|
@ -435,7 +435,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
|
||||
if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
|
||||
xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
|
||||
xfs_buf_item_relse(bp);
|
||||
} else if (!xfs_buf_item_dirty(bip)) {
|
||||
} else if (!(bip->bli_flags & XFS_BLI_DIRTY)) {
|
||||
/***
|
||||
ASSERT(bp->b_pincount == 0);
|
||||
***/
|
||||
@ -493,25 +493,17 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called to mark bytes first through last inclusive of the given
|
||||
* buffer as needing to be logged when the transaction is committed.
|
||||
* The buffer must already be associated with the given transaction.
|
||||
*
|
||||
* First and last are numbers relative to the beginning of this buffer,
|
||||
* so the first byte in the buffer is numbered 0 regardless of the
|
||||
* value of b_blkno.
|
||||
* Mark a buffer dirty in the transaction.
|
||||
*/
|
||||
void
|
||||
xfs_trans_log_buf(xfs_trans_t *tp,
|
||||
xfs_buf_t *bp,
|
||||
uint first,
|
||||
uint last)
|
||||
xfs_trans_dirty_buf(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
xfs_buf_log_item_t *bip = bp->b_fspriv;
|
||||
struct xfs_buf_log_item *bip = bp->b_fspriv;
|
||||
|
||||
ASSERT(bp->b_transp == tp);
|
||||
ASSERT(bip != NULL);
|
||||
ASSERT(first <= last && last < BBTOB(bp->b_length));
|
||||
ASSERT(bp->b_iodone == NULL ||
|
||||
bp->b_iodone == xfs_buf_iodone_callbacks);
|
||||
|
||||
@ -531,8 +523,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
|
||||
bp->b_iodone = xfs_buf_iodone_callbacks;
|
||||
bip->bli_item.li_cb = xfs_buf_iodone;
|
||||
|
||||
trace_xfs_trans_log_buf(bip);
|
||||
|
||||
/*
|
||||
* If we invalidated the buffer within this transaction, then
|
||||
* cancel the invalidation now that we're dirtying the buffer
|
||||
@ -545,17 +535,37 @@ xfs_trans_log_buf(xfs_trans_t *tp,
|
||||
bp->b_flags &= ~XBF_STALE;
|
||||
bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
|
||||
}
|
||||
bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
|
||||
|
||||
tp->t_flags |= XFS_TRANS_DIRTY;
|
||||
bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have an ordered buffer we are not logging any dirty range but
|
||||
* it still needs to be marked dirty and that it has been logged.
|
||||
*/
|
||||
bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
|
||||
if (!(bip->bli_flags & XFS_BLI_ORDERED))
|
||||
xfs_buf_item_log(bip, first, last);
|
||||
/*
|
||||
* This is called to mark bytes first through last inclusive of the given
|
||||
* buffer as needing to be logged when the transaction is committed.
|
||||
* The buffer must already be associated with the given transaction.
|
||||
*
|
||||
* First and last are numbers relative to the beginning of this buffer,
|
||||
* so the first byte in the buffer is numbered 0 regardless of the
|
||||
* value of b_blkno.
|
||||
*/
|
||||
void
|
||||
xfs_trans_log_buf(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *bp,
|
||||
uint first,
|
||||
uint last)
|
||||
{
|
||||
struct xfs_buf_log_item *bip = bp->b_fspriv;
|
||||
|
||||
ASSERT(first <= last && last < BBTOB(bp->b_length));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
|
||||
|
||||
xfs_trans_dirty_buf(tp, bp);
|
||||
|
||||
trace_xfs_trans_log_buf(bip);
|
||||
xfs_buf_item_log(bip, first, last);
|
||||
}
|
||||
|
||||
|
||||
@ -708,14 +718,13 @@ xfs_trans_inode_alloc_buf(
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the buffer as ordered for this transaction. This means
|
||||
* that the contents of the buffer are not recorded in the transaction
|
||||
* but it is tracked in the AIL as though it was. This allows us
|
||||
* to record logical changes in transactions rather than the physical
|
||||
* changes we make to the buffer without changing writeback ordering
|
||||
* constraints of metadata buffers.
|
||||
* Mark the buffer as ordered for this transaction. This means that the contents
|
||||
* of the buffer are not recorded in the transaction but it is tracked in the
|
||||
* AIL as though it was. This allows us to record logical changes in
|
||||
* transactions rather than the physical changes we make to the buffer without
|
||||
* changing writeback ordering constraints of metadata buffers.
|
||||
*/
|
||||
void
|
||||
bool
|
||||
xfs_trans_ordered_buf(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *bp)
|
||||
@ -726,8 +735,18 @@ xfs_trans_ordered_buf(
|
||||
ASSERT(bip != NULL);
|
||||
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
||||
|
||||
if (xfs_buf_item_dirty_format(bip))
|
||||
return false;
|
||||
|
||||
bip->bli_flags |= XFS_BLI_ORDERED;
|
||||
trace_xfs_buf_item_ordered(bip);
|
||||
|
||||
/*
|
||||
* We don't log a dirty range of an ordered buffer but it still needs
|
||||
* to be marked dirty and that it has been logged.
|
||||
*/
|
||||
xfs_trans_dirty_buf(tp, bp);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -134,3 +134,17 @@ xfs_trans_log_inode(
|
||||
flags |= ip->i_itemp->ili_last_fields;
|
||||
ip->i_itemp->ili_fields |= flags;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_trans_roll_inode(
|
||||
struct xfs_trans **tpp,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
int error;
|
||||
|
||||
xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
|
||||
error = xfs_trans_roll(tpp);
|
||||
if (!error)
|
||||
xfs_trans_ijoin(*tpp, ip, 0);
|
||||
return error;
|
||||
}
|
||||
|
@ -164,4 +164,35 @@ xfs_trans_ail_copy_lsn(
|
||||
*dst = *src;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
xfs_clear_li_failed(
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_buf *bp = lip->li_buf;
|
||||
|
||||
ASSERT(lip->li_flags & XFS_LI_IN_AIL);
|
||||
lockdep_assert_held(&lip->li_ailp->xa_lock);
|
||||
|
||||
if (lip->li_flags & XFS_LI_FAILED) {
|
||||
lip->li_flags &= ~XFS_LI_FAILED;
|
||||
lip->li_buf = NULL;
|
||||
xfs_buf_rele(bp);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
xfs_set_li_failed(
|
||||
struct xfs_log_item *lip,
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
lockdep_assert_held(&lip->li_ailp->xa_lock);
|
||||
|
||||
if (!(lip->li_flags & XFS_LI_FAILED)) {
|
||||
xfs_buf_hold(bp);
|
||||
lip->li_flags |= XFS_LI_FAILED;
|
||||
lip->li_buf = bp;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __XFS_TRANS_PRIV_H__ */
|
||||
|
@ -2837,6 +2837,7 @@ static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
|
||||
#endif
|
||||
extern void unlock_new_inode(struct inode *);
|
||||
extern unsigned int get_next_ino(void);
|
||||
extern void evict_inodes(struct super_block *sb);
|
||||
|
||||
extern void __iget(struct inode * inode);
|
||||
extern void iget_failed(struct inode *);
|
||||
|
@ -96,7 +96,7 @@ fsmap_advance(
|
||||
#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */
|
||||
#define FMR_OF_SHARED 0x8 /* segment = shared with another file */
|
||||
#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */
|
||||
#define FMR_OF_LAST 0x20 /* segment is the last in the FS */
|
||||
#define FMR_OF_LAST 0x20 /* segment is the last in the dataset */
|
||||
|
||||
/* Each FS gets to define its own special owner codes. */
|
||||
#define FMR_OWNER(type, code) (((__u64)type << 32) | \
|
||||
|
Loading…
Reference in New Issue
Block a user