diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 24462bdfd8e7..6833110d1bd4 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5280,7 +5280,6 @@ __xfs_bunmapi( int whichfork; /* data or attribute fork */ xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ - xfs_fileoff_t max_len; xfs_fileoff_t end; struct xfs_iext_cursor icur; bool done = false; @@ -5299,16 +5298,6 @@ __xfs_bunmapi( ASSERT(len > 0); ASSERT(nexts >= 0); - /* - * Guesstimate how many blocks we can unmap without running the risk of - * blowing out the transaction with a mix of EFIs and reflink - * adjustments. - */ - if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) - max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); - else - max_len = len; - error = xfs_iread_extents(tp, ip, whichfork); if (error) return error; @@ -5347,7 +5336,7 @@ __xfs_bunmapi( extno = 0; while (end != (xfs_fileoff_t)-1 && end >= start && - (nexts == 0 || extno < nexts) && max_len > 0) { + (nexts == 0 || extno < nexts)) { /* * Is the found extent after a hole in which end lives? * Just back up to the previous extent, if so. @@ -5381,14 +5370,6 @@ __xfs_bunmapi( if (del.br_startoff + del.br_blockcount > end + 1) del.br_blockcount = end + 1 - del.br_startoff; - /* How much can we safely unmap? */ - if (max_len < del.br_blockcount) { - del.br_startoff += del.br_blockcount - max_len; - if (!wasdel) - del.br_startblock += del.br_blockcount - max_len; - del.br_blockcount = max_len; - } - if (!isrt) goto delete; @@ -5524,7 +5505,6 @@ delete: if (error) goto error0; - max_len -= del.br_blockcount; end = del.br_startoff - 1; nodelete: /* diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 67798ff5e14e..9975b93a7412 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -14,6 +14,7 @@ #include "xfs_trans_space.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_trace.h" /* * Calculate the maximum length in bytes that would be required for a local @@ -36,6 +37,65 @@ xfs_log_calc_max_attrsetm_res( M_RES(mp)->tr_attrsetrt.tr_logres * nblks; } +/* + * Compute an alternate set of log reservation sizes for use exclusively with + * minimum log size calculations. + */ +static void +xfs_log_calc_trans_resv_for_minlogblocks( + struct xfs_mount *mp, + struct xfs_trans_resv *resv) +{ + unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; + + /* + * In the early days of rmap+reflink, we always set the rmap maxlevels + * to 9 even if the AG was small enough that it would never grow to + * that height. Transaction reservation sizes influence the minimum + * log size calculation, which influences the size of the log that mkfs + * creates. Use the old value here to ensure that newly formatted + * small filesystems will mount on older kernels. + */ + if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp)) + mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS; + + xfs_trans_resv_calc(mp, resv); + + if (xfs_has_reflink(mp)) { + /* + * In the early days of reflink, typical log operation counts + * were greatly overestimated. + */ + resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; + resv->tr_itruncate.tr_logcount = + XFS_ITRUNCATE_LOG_COUNT_REFLINK; + resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; + } else if (xfs_has_rmapbt(mp)) { + /* + * In the early days of non-reflink rmap, the impact of rmapbt + * updates on log counts were not taken into account at all. + */ + resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; + resv->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; + resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; + } + + /* + * In the early days of reflink, we did not use deferred refcount + * update log items, so log reservations must be recomputed using the + * old calculations. + */ + resv->tr_write.tr_logres = + xfs_calc_write_reservation_minlogsize(mp); + resv->tr_itruncate.tr_logres = + xfs_calc_itruncate_reservation_minlogsize(mp); + resv->tr_qm_dqalloc.tr_logres = + xfs_calc_qm_dqalloc_reservation_minlogsize(mp); + + /* Put everything back the way it was. This goes at the end. */ + mp->m_rmap_maxlevels = rmap_maxlevels; +} + /* * Iterate over the log space reservation table to figure out and return * the maximum one in terms of the pre-calculated values which were done @@ -46,19 +106,25 @@ xfs_log_get_max_trans_res( struct xfs_mount *mp, struct xfs_trans_res *max_resp) { + struct xfs_trans_resv resv = {}; struct xfs_trans_res *resp; struct xfs_trans_res *end_resp; + unsigned int i; int log_space = 0; int attr_space; attr_space = xfs_log_calc_max_attrsetm_res(mp); - resp = (struct xfs_trans_res *)M_RES(mp); - end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1); - for (; resp < end_resp; resp++) { + xfs_log_calc_trans_resv_for_minlogblocks(mp, &resv); + + resp = (struct xfs_trans_res *)&resv; + end_resp = (struct xfs_trans_res *)(&resv + 1); + for (i = 0; resp < end_resp; i++, resp++) { int tmp = resp->tr_logcount > 1 ? resp->tr_logres * resp->tr_logcount : resp->tr_logres; + + trace_xfs_trans_resv_calc_minlogsize(mp, i, resp); if (log_space < tmp) { log_space = tmp; *max_resp = *resp; /* struct copy */ @@ -66,9 +132,10 @@ xfs_log_get_max_trans_res( } if (attr_space > log_space) { - *max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */ + *max_resp = resv.tr_attrsetm; /* struct copy */ max_resp->tr_logres = attr_space; } + trace_xfs_log_get_max_trans_res(mp, max_resp); } /* diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 327ba25e9e17..97e9e6020596 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -886,8 +886,13 @@ xfs_refcount_still_have_space( { unsigned long overhead; - overhead = cur->bc_ag.refc.shape_changes * - xfs_allocfree_log_count(cur->bc_mp, 1); + /* + * Worst case estimate: full splits of the free space and rmap btrees + * to handle each of the shape changes to the refcount btree. + */ + overhead = xfs_allocfree_block_count(cur->bc_mp, + cur->bc_ag.refc.shape_changes); + overhead += cur->bc_mp->m_refc_maxlevels; overhead *= cur->bc_mp->m_sb.sb_blocksize; /* @@ -960,6 +965,7 @@ xfs_refcount_adjust_extents( * Either cover the hole (increment) or * delete the range (decrement). */ + cur->bc_ag.refc.nr_ops++; if (tmp.rc_refcount) { error = xfs_refcount_insert(cur, &tmp, &found_tmp); @@ -970,7 +976,6 @@ xfs_refcount_adjust_extents( error = -EFSCORRUPTED; goto out_error; } - cur->bc_ag.refc.nr_ops++; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, @@ -1001,11 +1006,11 @@ xfs_refcount_adjust_extents( ext.rc_refcount += adj; trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &ext); + cur->bc_ag.refc.nr_ops++; if (ext.rc_refcount > 1) { error = xfs_refcount_update(cur, &ext); if (error) goto out_error; - cur->bc_ag.refc.nr_ops++; } else if (ext.rc_refcount == 1) { error = xfs_refcount_delete(cur, &found_rec); if (error) @@ -1014,7 +1019,6 @@ xfs_refcount_adjust_extents( error = -EFSCORRUPTED; goto out_error; } - cur->bc_ag.refc.nr_ops++; goto advloop; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 9eb01edbd89d..e8b322de7f3d 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -67,14 +67,17 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, * log (plus any key updates) so we'll conservatively assume 32 bytes * per record. We must also leave space for btree splits on both ends * of the range and space for the CUD and a new CUI. + * + * Each EFI that we attach to the transaction is assumed to consume ~32 bytes. + * This is a low estimate for an EFI tracking a single extent (16 bytes for the + * EFI header, 16 for the extent, and 12 for the xlog op header), but the + * estimate is acceptable if there's more than one extent being freed. + * In the worst case of freeing every other block during a refcount decrease + * operation, we amortize the space used for one EFI log item across 16 + * extents. */ #define XFS_REFCOUNT_ITEM_OVERHEAD 32 -static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) -{ - return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; -} - extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool *exists); union xfs_btree_rec; diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 8e1d09e8cc9a..e9913c2c5a24 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -56,15 +56,14 @@ xfs_calc_buf_res( * Per-extent log reservation for the btree changes involved in freeing or * allocating an extent. In classic XFS there were two trees that will be * modified (bnobt + cntbt). With rmap enabled, there are three trees - * (rmapbt). With reflink, there are four trees (refcountbt). The number of - * blocks reserved is based on the formula: + * (rmapbt). The number of blocks reserved is based on the formula: * * num trees * ((2 blocks/level * max depth) - 1) * * Keep in mind that max depth is calculated separately for each type of tree. */ uint -xfs_allocfree_log_count( +xfs_allocfree_block_count( struct xfs_mount *mp, uint num_ops) { @@ -73,12 +72,23 @@ xfs_allocfree_log_count( blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1); if (xfs_has_rmapbt(mp)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); - if (xfs_has_reflink(mp)) - blocks += num_ops * (2 * mp->m_refc_maxlevels - 1); return blocks; } +/* + * Per-extent log reservation for refcount btree changes. These are never done + * in the same transaction as an allocation or a free, so we compute them + * separately. + */ +static unsigned int +xfs_refcountbt_block_count( + struct xfs_mount *mp, + unsigned int num_ops) +{ + return num_ops * (2 * mp->m_refc_maxlevels - 1); +} + /* * Logging inodes is really tricksy. They are logged in memory format, * which means that what we write into the log doesn't directly translate into @@ -136,7 +146,7 @@ xfs_calc_inobt_res( { return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), XFS_FSB_TO_B(mp, 1)); } @@ -183,7 +193,7 @@ xfs_calc_inode_chunk_res( { uint res, size = 0; - res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), XFS_FSB_TO_B(mp, 1)); if (alloc) { /* icreate tx uses ordered buffers */ @@ -203,7 +213,7 @@ xfs_calc_inode_chunk_res( * extents, as well as the realtime summary block. */ static unsigned int -xfs_rtalloc_log_count( +xfs_rtalloc_block_count( struct xfs_mount *mp, unsigned int num_ops) { @@ -233,6 +243,28 @@ xfs_rtalloc_log_count( * register overflow from temporaries in the calculations. */ +/* + * Compute the log reservation required to handle the refcount update + * transaction. Refcount updates are always done via deferred log items. + * + * This is calculated as: + * Data device refcount updates (t1): + * the agfs of the ags containing the blocks: nr_ops * sector size + * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size + */ +static unsigned int +xfs_calc_refcountbt_reservation( + struct xfs_mount *mp, + unsigned int nr_ops) +{ + unsigned int blksz = XFS_FSB_TO_B(mp, 1); + + if (!xfs_has_reflink(mp)) + return 0; + + return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz); +} /* * In a write transaction we can allocate a maximum of 2 @@ -255,34 +287,65 @@ xfs_rtalloc_log_count( * the agfls of the ags containing the blocks: 2 * sector size * the super block free block counter: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + * And any refcount updates that happen in a separate transaction (t4). */ STATIC uint xfs_calc_write_reservation( - struct xfs_mount *mp) + struct xfs_mount *mp, + bool for_minlogsize) { - unsigned int t1, t2, t3; + unsigned int t1, t2, t3, t4; unsigned int blksz = XFS_FSB_TO_B(mp, 1); t1 = xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); if (xfs_has_realtime(mp)) { t2 = xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz); + xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) + + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz); } else { t2 = 0; } t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + /* + * In the early days of reflink, we included enough reservation to log + * two refcountbt splits for each transaction. The codebase runs + * refcountbt updates in separate transactions now, so to compute the + * minimum log size, add the refcountbtree splits back to t1 and t3 and + * do not account them separately as t4. Reflink did not support + * realtime when the reservations were established, so no adjustment to + * t2 is needed. + */ + if (for_minlogsize) { + unsigned int adj = 0; + + if (xfs_has_reflink(mp)) + adj = xfs_calc_buf_res( + xfs_refcountbt_block_count(mp, 2), + blksz); + t1 += adj; + t3 += adj; + return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + } + + t4 = xfs_calc_refcountbt_reservation(mp, 1); + return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); +} + +unsigned int +xfs_calc_write_reservation_minlogsize( + struct xfs_mount *mp) +{ + return xfs_calc_write_reservation(mp, true); } /* @@ -304,29 +367,57 @@ xfs_calc_write_reservation( * the realtime summary: 2 exts * 1 block * worst case split in allocation btrees per extent assuming 2 extents: * 2 exts * 2 trees * (2 * max depth - 1) * block size + * And any refcount updates that happen in a separate transaction (t4). */ STATIC uint xfs_calc_itruncate_reservation( - struct xfs_mount *mp) + struct xfs_mount *mp, + bool for_minlogsize) { - unsigned int t1, t2, t3; + unsigned int t1, t2, t3, t4; unsigned int blksz = XFS_FSB_TO_B(mp, 1); t1 = xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz); t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz); + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz); if (xfs_has_realtime(mp)) { t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); + xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) + + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); } else { t3 = 0; } - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + /* + * In the early days of reflink, we included enough reservation to log + * four refcountbt splits in the same transaction as bnobt/cntbt + * updates. The codebase runs refcountbt updates in separate + * transactions now, so to compute the minimum log size, add the + * refcount btree splits back here and do not compute them separately + * as t4. Reflink did not support realtime when the reservations were + * established, so do not adjust t3. + */ + if (for_minlogsize) { + if (xfs_has_reflink(mp)) + t2 += xfs_calc_buf_res( + xfs_refcountbt_block_count(mp, 4), + blksz); + + return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + } + + t4 = xfs_calc_refcountbt_reservation(mp, 2); + return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); +} + +unsigned int +xfs_calc_itruncate_reservation_minlogsize( + struct xfs_mount *mp) +{ + return xfs_calc_itruncate_reservation(mp, true); } /* @@ -350,7 +441,7 @@ xfs_calc_rename_reservation( xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3), XFS_FSB_TO_B(mp, 1)))); } @@ -390,7 +481,7 @@ xfs_calc_link_reservation( xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), XFS_FSB_TO_B(mp, 1)))); } @@ -428,7 +519,7 @@ xfs_calc_remove_reservation( xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), XFS_FSB_TO_B(mp, 1)))); } @@ -573,7 +664,7 @@ xfs_calc_growdata_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), XFS_FSB_TO_B(mp, 1)); } @@ -595,7 +686,7 @@ xfs_calc_growrtalloc_reservation( xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), XFS_FSB_TO_B(mp, 1)) + xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), XFS_FSB_TO_B(mp, 1)); } @@ -671,7 +762,7 @@ xfs_calc_addafork_reservation( xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), XFS_FSB_TO_B(mp, 1)); } @@ -694,7 +785,7 @@ xfs_calc_attrinval_reservation( xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), XFS_FSB_TO_B(mp, 1)))); } @@ -761,7 +852,7 @@ xfs_calc_attrrm_reservation( XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), + xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), XFS_FSB_TO_B(mp, 1)))); } @@ -792,13 +883,21 @@ xfs_calc_qm_setqlim_reservation(void) */ STATIC uint xfs_calc_qm_dqalloc_reservation( - struct xfs_mount *mp) + struct xfs_mount *mp, + bool for_minlogsize) { - return xfs_calc_write_reservation(mp) + + return xfs_calc_write_reservation(mp, for_minlogsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); } +unsigned int +xfs_calc_qm_dqalloc_reservation_minlogsize( + struct xfs_mount *mp) +{ + return xfs_calc_qm_dqalloc_reservation(mp, true); +} + /* * Syncing the incore super block changes to disk. * the super block to reflect the changes: sector size @@ -815,36 +914,18 @@ xfs_trans_resv_calc( struct xfs_mount *mp, struct xfs_trans_resv *resp) { - unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; - - /* - * In the early days of rmap+reflink, we always set the rmap maxlevels - * to 9 even if the AG was small enough that it would never grow to - * that height. Transaction reservation sizes influence the minimum - * log size calculation, which influences the size of the log that mkfs - * creates. Use the old value here to ensure that newly formatted - * small filesystems will mount on older kernels. - */ - if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp)) - mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS; + int logcount_adj = 0; /* * The following transactions are logged in physical format and * require a permanent reservation on space. */ - resp->tr_write.tr_logres = xfs_calc_write_reservation(mp); - if (xfs_has_reflink(mp)) - resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; - else - resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; + resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false); + resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp); - if (xfs_has_reflink(mp)) - resp->tr_itruncate.tr_logcount = - XFS_ITRUNCATE_LOG_COUNT_REFLINK; - else - resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; + resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false); + resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); @@ -900,11 +981,9 @@ xfs_trans_resv_calc( resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp); - if (xfs_has_reflink(mp)) - resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; - else - resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; + resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp, + false); + resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; /* @@ -931,6 +1010,19 @@ xfs_trans_resv_calc( resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); - /* Put everything back the way it was. This goes at the end. */ - mp->m_rmap_maxlevels = rmap_maxlevels; + /* + * Add one logcount for BUI items that appear with rmap or reflink, + * one logcount for refcount intent items, and one logcount for rmap + * intent items. + */ + if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp)) + logcount_adj++; + if (xfs_has_reflink(mp)) + logcount_adj++; + if (xfs_has_rmapbt(mp)) + logcount_adj++; + + resp->tr_itruncate.tr_logcount += logcount_adj; + resp->tr_write.tr_logcount += logcount_adj; + resp->tr_qm_dqalloc.tr_logcount += logcount_adj; } diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index fc4e9b369a3a..0554b9d775d2 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -73,7 +73,6 @@ struct xfs_trans_resv { #define XFS_DEFAULT_LOG_COUNT 1 #define XFS_DEFAULT_PERM_LOG_COUNT 2 #define XFS_ITRUNCATE_LOG_COUNT 2 -#define XFS_ITRUNCATE_LOG_COUNT_REFLINK 8 #define XFS_INACTIVE_LOG_COUNT 2 #define XFS_CREATE_LOG_COUNT 2 #define XFS_CREATE_TMPFILE_LOG_COUNT 2 @@ -83,13 +82,24 @@ struct xfs_trans_resv { #define XFS_LINK_LOG_COUNT 2 #define XFS_RENAME_LOG_COUNT 2 #define XFS_WRITE_LOG_COUNT 2 -#define XFS_WRITE_LOG_COUNT_REFLINK 8 #define XFS_ADDAFORK_LOG_COUNT 2 #define XFS_ATTRINVAL_LOG_COUNT 1 #define XFS_ATTRSET_LOG_COUNT 3 #define XFS_ATTRRM_LOG_COUNT 3 +/* + * Original log operation counts were overestimated in the early days of + * reflink. These are retained here purely for minimum log size calculations + * and must not be used for runtime reservations. + */ +#define XFS_ITRUNCATE_LOG_COUNT_REFLINK 8 +#define XFS_WRITE_LOG_COUNT_REFLINK 8 + void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp); -uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops); +uint xfs_allocfree_block_count(struct xfs_mount *mp, uint num_ops); + +unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp); +unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp); +unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp); #endif /* __XFS_TRANS_RESV_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1ae6d3434ad2..e7a7c00d93be 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -586,21 +586,21 @@ out: STATIC int xfs_reflink_end_cow_extent( struct xfs_inode *ip, - xfs_fileoff_t offset_fsb, - xfs_fileoff_t *end_fsb) + xfs_fileoff_t *offset_fsb, + xfs_fileoff_t end_fsb) { - struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; + struct xfs_bmbt_irec got, del, data; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - xfs_filblks_t rlen; unsigned int resblks; + int nmaps; int error; /* No COW extents? That's easy! */ if (ifp->if_bytes == 0) { - *end_fsb = offset_fsb; + *offset_fsb = end_fsb; return 0; } @@ -631,42 +631,66 @@ xfs_reflink_end_cow_extent( * left by the time I/O completes for the loser of the race. In that * case we are done. */ - if (!xfs_iext_lookup_extent_before(ip, ifp, end_fsb, &icur, &got) || - got.br_startoff + got.br_blockcount <= offset_fsb) { - *end_fsb = offset_fsb; + if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) || + got.br_startoff >= end_fsb) { + *offset_fsb = end_fsb; goto out_cancel; } - /* - * Structure copy @got into @del, then trim @del to the range that we - * were asked to remap. We preserve @got for the eventual CoW fork - * deletion; from now on @del represents the mapping that we're - * actually remapping. - */ - del = got; - xfs_trim_extent(&del, offset_fsb, *end_fsb - offset_fsb); - - ASSERT(del.br_blockcount > 0); - /* * Only remap real extents that contain data. With AIO, speculative * preallocations can leak into the range we are called upon, and we - * need to skip them. + * need to skip them. Preserve @got for the eventual CoW fork + * deletion; from now on @del represents the mapping that we're + * actually remapping. */ - if (!xfs_bmap_is_written_extent(&got)) { - *end_fsb = del.br_startoff; - goto out_cancel; + while (!xfs_bmap_is_written_extent(&got)) { + if (!xfs_iext_next_extent(ifp, &icur, &got) || + got.br_startoff >= end_fsb) { + *offset_fsb = end_fsb; + goto out_cancel; + } } + del = got; - /* Unmap the old blocks in the data fork. */ - rlen = del.br_blockcount; - error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1); + /* Grab the corresponding mapping in the data fork. */ + nmaps = 1; + error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data, + &nmaps, 0); if (error) goto out_cancel; - /* Trim the extent to whatever got unmapped. */ - xfs_trim_extent(&del, del.br_startoff + rlen, del.br_blockcount - rlen); - trace_xfs_reflink_cow_remap(ip, &del); + /* We can only remap the smaller of the two extent sizes. */ + data.br_blockcount = min(data.br_blockcount, del.br_blockcount); + del.br_blockcount = data.br_blockcount; + + trace_xfs_reflink_cow_remap_from(ip, &del); + trace_xfs_reflink_cow_remap_to(ip, &data); + + if (xfs_bmap_is_real_extent(&data)) { + /* + * If the extent we're remapping is backed by storage (written + * or not), unmap the extent and drop its refcount. + */ + xfs_bmap_unmap_extent(tp, ip, &data); + xfs_refcount_decrease_extent(tp, &data); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, + -data.br_blockcount); + } else if (data.br_startblock == DELAYSTARTBLOCK) { + int done; + + /* + * If the extent we're remapping is a delalloc reservation, + * we can use the regular bunmapi function to release the + * incore state. Dropping the delalloc reservation takes care + * of the quota reservation for us. + */ + error = xfs_bunmapi(NULL, ip, data.br_startoff, + data.br_blockcount, 0, 1, &done); + if (error) + goto out_cancel; + ASSERT(done); + } /* Free the CoW orphan record. */ xfs_refcount_free_cow_extent(tp, del.br_startblock, del.br_blockcount); @@ -687,7 +711,7 @@ xfs_reflink_end_cow_extent( return error; /* Update the caller about how much progress we made. */ - *end_fsb = del.br_startoff; + *offset_fsb = del.br_startoff + del.br_blockcount; return 0; out_cancel: @@ -715,7 +739,7 @@ xfs_reflink_end_cow( end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* - * Walk backwards until we're out of the I/O range. The loop function + * Walk forwards until we've remapped the I/O range. The loop function * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * @@ -747,7 +771,7 @@ xfs_reflink_end_cow( * blocks will be remapped. */ while (end_fsb > offset_fsb && !error) - error = xfs_reflink_end_cow_extent(ip, offset_fsb, &end_fsb); + error = xfs_reflink_end_cow_extent(ip, &offset_fsb, end_fsb); if (error) trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); @@ -1138,7 +1162,7 @@ xfs_reflink_remap_extent( xfs_refcount_decrease_extent(tp, &smap); qdelta -= smap.br_blockcount; } else if (smap.br_startblock == DELAYSTARTBLOCK) { - xfs_filblks_t len = smap.br_blockcount; + int done; /* * If the extent we're unmapping is a delalloc reservation, @@ -1146,10 +1170,11 @@ xfs_reflink_remap_extent( * incore state. Dropping the delalloc reservation takes care * of the quota reservation for us. */ - error = __xfs_bunmapi(NULL, ip, smap.br_startoff, &len, 0, 1); + error = xfs_bunmapi(NULL, ip, smap.br_startoff, + smap.br_blockcount, 0, 1, &done); if (error) goto out_cancel; - ASSERT(len == 0); + ASSERT(done); } /* diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index fe8f4ec98ec6..e19a3f7351be 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3408,7 +3408,8 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_from); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_to); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); @@ -3503,7 +3504,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); -TRACE_EVENT(xfs_trans_resv_calc, +DECLARE_EVENT_CLASS(xfs_trans_resv_class, TP_PROTO(struct xfs_mount *mp, unsigned int type, struct xfs_trans_res *res), TP_ARGS(mp, type, res), @@ -3527,6 +3528,33 @@ TRACE_EVENT(xfs_trans_resv_calc, __entry->logres, __entry->logcount, __entry->logflags) +) + +#define DEFINE_TRANS_RESV_EVENT(name) \ +DEFINE_EVENT(xfs_trans_resv_class, name, \ + TP_PROTO(struct xfs_mount *mp, unsigned int type, \ + struct xfs_trans_res *res), \ + TP_ARGS(mp, type, res)) +DEFINE_TRANS_RESV_EVENT(xfs_trans_resv_calc); +DEFINE_TRANS_RESV_EVENT(xfs_trans_resv_calc_minlogsize); + +TRACE_EVENT(xfs_log_get_max_trans_res, + TP_PROTO(struct xfs_mount *mp, const struct xfs_trans_res *res), + TP_ARGS(mp, res), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint, logres) + __field(int, logcount) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->logres = res->tr_logres; + __entry->logcount = res->tr_logcount; + ), + TP_printk("dev %d:%d logres %u logcount %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->logres, + __entry->logcount) ); DECLARE_EVENT_CLASS(xfs_trans_class, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 836ce2beac53..82cf0189c0db 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -32,7 +32,6 @@ static void xfs_trans_trace_reservations( struct xfs_mount *mp) { - struct xfs_trans_res resv; struct xfs_trans_res *res; struct xfs_trans_res *end_res; int i; @@ -41,8 +40,6 @@ xfs_trans_trace_reservations( end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); for (i = 0; res < end_res; i++, res++) trace_xfs_trans_resv_calc(mp, i, res); - xfs_log_get_max_trans_res(mp, &resv); - trace_xfs_trans_resv_calc(mp, -1, &resv); } #else # define xfs_trans_trace_reservations(mp)