Fixes for 5.5:

- Minor documentation fixes
 - Fix a file corruption due to read racing with an insert range
 operation.
 - Fix log reservation overflows when allocating large rt extents
 - Fix a buffer log item flags check
 - Don't allow administrators to mount with sunit= options that will
 cause later xfs_repair complaints about the root directory being
 suspicious because the fs geometry appeared inconsistent
 - Fix a non-static helper that should have been static
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAl3890IACgkQ+H93GTRK
 tOuNkw//e7lT3Xys+dd60Regn+EkYkuI6myN4TsRNYHI7fQ0C9FqkGyYltJmEvqr
 QhUvQD1BU9Czb5Ghba+DpYz2dpqLYrbVTdmK4jNGjt9K0xNEU7zL297/PyE5Y54t
 il5nAxZVZ9x0aadKS0yhIt+Q3+dN29O2ablcRcErPi6H5EM3csjmPnrHKD+irG5j
 MhY5NNWvU1//qU4w2q8ikRKGhMrDYLWo57iJoIX2y17Sw+HXrDsEGoavOpyaoy0v
 T5m4OfBxU9FD8UhqI86Pua9HG8AlZK+IPT9pZjYGYWT8mkuTppSWjSHJU6HBGqF2
 fNrfMpQK/2H5KrqBTVvAzbhYcby8L1tZXUg+4w5iJuvAlHqb/IuBd+Y+nbSbduL/
 O9k3Ao0PL6Yt78knNf2F1943ioAI0zbhjDhmKtX17qfAojWQz6CAJmP5OWPWPprh
 FHA9WT0OzArXF77E+srfYyChclQzllBTOmYNKU//sXgKnqe33fgRIN6Il3T6V3w1
 5ifI/0N+FV2Z+yRqE0gSaqLNdPATMNzuGorQsv7P+TRPtD70aB8dhRzcVzqzfbTm
 C7owl3FGQFTCS/PIwPTRsLfqt3vt9mvc7pUMZOIu7uP63T2daPZ2amTbav/poXgb
 5Zih0pknWS8iQM4bwaPMLEr51Wp3Yo8gDPuW1jKJ13FCOuXU70E=
 =JXs9
 -----END PGP SIGNATURE-----

Merge tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "Fix a few bugs that could lead to corrupt files, fsck complaints, and
  filesystem crashes:

   - Minor documentation fixes

   - Fix a file corruption due to read racing with an insert range
     operation.

   - Fix log reservation overflows when allocating large rt extents

   - Fix a buffer log item flags check

   - Don't allow administrators to mount with sunit= options that will
     cause later xfs_repair complaints about the root directory being
     suspicious because the fs geometry appeared inconsistent

   - Fix a non-static helper that should have been static"

* tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: Make the symbol 'xfs_rtalloc_log_count' static
  xfs: don't commit sunit/swidth updates to disk if that would cause repair failures
  xfs: split the sunit parameter update into two parts
  xfs: refactor agfl length computation function
  libxfs: resync with the userspace libxfs
  xfs: use bitops interface for buf log item AIL flag check
  xfs: fix log reservation overflows when allocating large rt extents
  xfs: stabilize insert range start boundary to avoid COW writeback race
  xfs: fix Sphinx documentation warning
This commit is contained in:
Linus Torvalds 2019-12-22 10:59:06 -08:00
commit c601747175
13 changed files with 347 additions and 110 deletions

View File

@ -253,7 +253,7 @@ The following sysctls are available for the XFS filesystem:
pool. pool.
fs.xfs.speculative_prealloc_lifetime fs.xfs.speculative_prealloc_lifetime
(Units: seconds Min: 1 Default: 300 Max: 86400) (Units: seconds Min: 1 Default: 300 Max: 86400)
The interval at which the background scanning for inodes The interval at which the background scanning for inodes
with unused speculative preallocation runs. The scan with unused speculative preallocation runs. The scan
removes unused preallocation from clean inodes and releases removes unused preallocation from clean inodes and releases

View File

@ -2248,24 +2248,32 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0; return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
} }
/*
* Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
* return the largest possible minimum length.
*/
unsigned int unsigned int
xfs_alloc_min_freelist( xfs_alloc_min_freelist(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_perag *pag) struct xfs_perag *pag)
{ {
/* AG btrees have at least 1 level. */
static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free; unsigned int min_free;
ASSERT(mp->m_ag_maxlevels > 0);
/* space needed by-bno freespace btree */ /* space needed by-bno freespace btree */
min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels); mp->m_ag_maxlevels);
/* space needed by-size freespace btree */ /* space needed by-size freespace btree */
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels); mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */ /* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb)) if (xfs_sb_version_hasrmapbt(&mp->m_sb))
min_free += min_t(unsigned int, min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, mp->m_rmap_maxlevels);
mp->m_rmap_maxlevels);
return min_free; return min_free;
} }

View File

@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL }; struct xfs_bmalloca bma = { NULL };
u16 flags = 0; uint16_t flags = 0;
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents(
goto del_cursor; goto del_cursor;
} }
if (XFS_IS_CORRUPT(mp, if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
stop_fsb >= got.br_startoff + got.br_blockcount)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
goto del_cursor; goto del_cursor;
} }

View File

@ -724,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */ /* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length); return !memchr(name, '/', length) && !memchr(name, 0, length);
} }
xfs_dahash_t
xfs_dir2_hashname(
struct xfs_mount *mp,
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}
enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}

View File

@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
int xfs_dir2_sf_entsize(struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, int len);
void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, uint8_t ftype);
/* xfs_dir2_readdir.c */ /* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
@ -194,25 +200,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN); return round_up(len, XFS_DIR2_DATA_ALIGN);
} }
static inline xfs_dahash_t xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
xfs_dir2_hashname( enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
struct xfs_mount *mp, const unsigned char *name, int len);
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}
static inline enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
#endif /* __XFS_DIR2_PRIV_H__ */ #endif /* __XFS_DIR2_PRIV_H__ */

View File

@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
static int int
xfs_dir2_sf_entsize( xfs_dir2_sf_entsize(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_hdr *hdr,
@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino(
return get_unaligned_be64(from) & XFS_MAXINUMBER; return get_unaligned_be64(from) & XFS_MAXINUMBER;
} }
static void void
xfs_dir2_sf_put_ino( xfs_dir2_sf_put_ino(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_hdr *hdr,
@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype(
return XFS_DIR3_FT_UNKNOWN; return XFS_DIR3_FT_UNKNOWN;
} }
static void void
xfs_dir2_sf_put_ftype( xfs_dir2_sf_put_ftype(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, struct xfs_dir2_sf_entry *sfep,

View File

@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else else
igeo->ialloc_align = 0; igeo->ialloc_align = 0;
} }
/* Compute the location of the root directory inode that is laid out by mkfs. */
xfs_ino_t
xfs_ialloc_calc_rootino(
struct xfs_mount *mp,
int sunit)
{
struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_agblock_t first_bno;
/*
* Pre-calculate the geometry of AG 0. We know what it looks like
* because libxfs knows how to create allocation groups now.
*
* first_bno is the first block in which mkfs could possibly have
* allocated the root directory inode, once we factor in the metadata
* that mkfs formats before it. Namely, the four AG headers...
*/
first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
/* ...the two free space btree roots... */
first_bno += 2;
/* ...the inode btree root... */
first_bno += 1;
/* ...the initial AGFL... */
first_bno += xfs_alloc_min_freelist(mp, NULL);
/* ...the free inode btree root... */
if (xfs_sb_version_hasfinobt(&mp->m_sb))
first_bno++;
/* ...the reverse mapping btree root... */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
first_bno++;
/* ...the reference count btree... */
if (xfs_sb_version_hasreflink(&mp->m_sb))
first_bno++;
/*
* ...and the log, if it is allocated in the first allocation group.
*
* This can happen with filesystems that only have a single
* allocation group, or very odd geometries created by old mkfs
* versions on very small filesystems.
*/
if (mp->m_sb.sb_logstart &&
XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
first_bno += mp->m_sb.sb_logblocks;
/*
* Now round first_bno up to whatever allocation alignment is given
* by the filesystem or was passed in.
*/
if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
first_bno = roundup(first_bno, sunit);
else if (xfs_sb_version_hasalign(&mp->m_sb) &&
mp->m_sb.sb_inoalignmt > 1)
first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
}

View File

@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
#endif /* __XFS_IALLOC_H__ */ #endif /* __XFS_IALLOC_H__ */

View File

@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res(
return res; return res;
} }
/*
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
* as well as the realtime summary block.
*/
static unsigned int
xfs_rtalloc_log_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
unsigned int rtbmp_bytes;
rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
}
/* /*
* Various log reservation values. * Various log reservation values.
* *
@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/* /*
* In a write transaction we can allocate a maximum of 2 * In a write transaction we can allocate a maximum of 2
* extents. This gives: * extents. This gives (t1):
* the inode getting the new extents: inode size * the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size * the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector * the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size * the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join: * Or, if we're writing to a realtime file (t2):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 1 block
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size * the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size * the super block free block counter: sector size
@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation( xfs_calc_write_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) + blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
XFS_FSB_TO_B(mp, 1))), xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + } else {
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), t2 = 0;
XFS_FSB_TO_B(mp, 1)))); }
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
* In truncating a file we free up to two extents at once. We can modify: * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size * the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size * the inode's bmap btree: (max depth + 1) * block size
* And the bmap_finish transaction can free the blocks and bmap blocks: * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size * the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size * the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents: * worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size * 4 exts * 2 trees * (2 * max depth - 1) * block size
* Or, if it's a realtime file (t3):
* the agf for each of the ags: 2 * sector size
* the agfl for each of the ags: 2 * sector size
* the super block to reflect the freed blocks: sector size
* the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ */
STATIC uint STATIC uint
xfs_calc_itruncate_reservation( xfs_calc_itruncate_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))), t1 = xfs_calc_inode_res(mp, 1) +
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1)))); t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
} else {
t3 = 0;
}
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*

View File

@ -992,6 +992,7 @@ xfs_prepare_shift(
struct xfs_inode *ip, struct xfs_inode *ip,
loff_t offset) loff_t offset)
{ {
struct xfs_mount *mp = ip->i_mount;
int error; int error;
/* /*
@ -1004,6 +1005,17 @@ xfs_prepare_shift(
return error; return error;
} }
/*
* Shift operations must stabilize the start block offset boundary along
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
* with the block just prior to the start to stabilize the boundary.
*/
offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
if (offset)
offset -= (1 << mp->m_sb.sb_blocklog);
/* /*
* Writeback and invalidate cache for the remainder of the file as we're * Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF. * about to shift down every extent from offset to EOF.

View File

@ -956,7 +956,7 @@ xfs_buf_item_relse(
struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_); trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL; bp->b_log_item = NULL;
if (list_empty(&bp->b_li_list)) if (list_empty(&bp->b_li_list))

View File

@ -31,7 +31,7 @@
#include "xfs_reflink.h" #include "xfs_reflink.h"
#include "xfs_extent_busy.h" #include "xfs_extent_busy.h"
#include "xfs_health.h" #include "xfs_health.h"
#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex); static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size; static int xfs_uuid_table_size;
@ -360,66 +360,119 @@ release_buf:
} }
/* /*
* Update alignment values based on mount options and sb values * If the sunit/swidth change would move the precomputed root inode value, we
* must reject the ondisk change because repair will stumble over that.
* However, we allow the mount to proceed because we never rejected this
* combination before. Returns true to update the sb, false otherwise.
*/
static inline int
xfs_check_new_dalign(
struct xfs_mount *mp,
int new_dalign,
bool *update_sb)
{
struct xfs_sb *sbp = &mp->m_sb;
xfs_ino_t calc_ino;
calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
if (sbp->sb_rootino == calc_ino) {
*update_sb = true;
return 0;
}
xfs_warn(mp,
"Cannot change stripe alignment; would require moving root inode.");
/*
* XXX: Next time we add a new incompat feature, this should start
* returning -EINVAL to fail the mount. Until then, spit out a warning
* that we're ignoring the administrator's instructions.
*/
xfs_warn(mp, "Skipping superblock stripe alignment update.");
*update_sb = false;
return 0;
}
/*
* If we were provided with new sunit/swidth values as mount options, make sure
* that they pass basic alignment and superblock feature checks, and convert
* them into the same units (FSB) that everything else expects. This step
* /must/ be done before computing the inode geometry.
*/ */
STATIC int STATIC int
xfs_update_alignment(xfs_mount_t *mp) xfs_validate_new_dalign(
struct xfs_mount *mp)
{ {
xfs_sb_t *sbp = &(mp->m_sb); if (mp->m_dalign == 0)
return 0;
/*
* If stripe unit and stripe width are not multiples
* of the fs blocksize turn off alignment.
*/
if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)",
mp->m_sb.sb_blocksize);
return -EINVAL;
} else {
/*
* Convert the stripe unit and width to FSBs.
*/
mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. agsize(%d)",
mp->m_sb.sb_agblocks);
return -EINVAL;
} else if (mp->m_dalign) {
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
"alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL;
}
}
if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment");
return -EINVAL;
}
return 0;
}
/* Update alignment values based on mount options and sb values. */
STATIC int
xfs_update_alignment(
struct xfs_mount *mp)
{
struct xfs_sb *sbp = &mp->m_sb;
if (mp->m_dalign) { if (mp->m_dalign) {
/* bool update_sb;
* If stripe unit and stripe width are not multiples int error;
* of the fs blocksize turn off alignment.
*/
if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)",
sbp->sb_blocksize);
return -EINVAL;
} else {
/*
* Convert the stripe unit and width to FSBs.
*/
mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. agsize(%d)",
sbp->sb_agblocks);
return -EINVAL;
} else if (mp->m_dalign) {
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
"alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, sbp->sb_blocksize);
return -EINVAL;
}
}
/* if (sbp->sb_unit == mp->m_dalign &&
* Update superblock with new values sbp->sb_width == mp->m_swidth)
* and log changes return 0;
*/
if (xfs_sb_version_hasdalign(sbp)) { error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
if (sbp->sb_unit != mp->m_dalign) { if (error || !update_sb)
sbp->sb_unit = mp->m_dalign; return error;
mp->m_update_sb = true;
} sbp->sb_unit = mp->m_dalign;
if (sbp->sb_width != mp->m_swidth) { sbp->sb_width = mp->m_swidth;
sbp->sb_width = mp->m_swidth; mp->m_update_sb = true;
mp->m_update_sb = true;
}
} else {
xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment");
return -EINVAL;
}
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) { xfs_sb_version_hasdalign(&mp->m_sb)) {
mp->m_dalign = sbp->sb_unit; mp->m_dalign = sbp->sb_unit;
mp->m_swidth = sbp->sb_width; mp->m_swidth = sbp->sb_width;
} }
return 0; return 0;
@ -648,12 +701,12 @@ xfs_mountfs(
} }
/* /*
* Check if sb_agblocks is aligned at stripe boundary * If we were given new sunit/swidth options, do some basic validation
* If sb_agblocks is NOT aligned turn off m_dalign since * checks and convert the incore dalign and swidth values to the
* allocator alignment is within an ag, therefore ag has * same units (FSB) that everything else uses. This /must/ happen
* to be aligned at stripe boundary. * before computing the inode geometry.
*/ */
error = xfs_update_alignment(mp); error = xfs_validate_new_dalign(mp);
if (error) if (error)
goto out; goto out;
@ -664,6 +717,17 @@ xfs_mountfs(
xfs_rmapbt_compute_maxlevels(mp); xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp);
/*
* Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
* is NOT aligned turn off m_dalign since allocator alignment is within
* an ag, therefore ag has to be aligned at stripe boundary. Note that
* we must compute the free space and rmap btree geometry before doing
* this.
*/
error = xfs_update_alignment(mp);
if (error)
goto out;
/* enable fail_at_unmount as default */ /* enable fail_at_unmount as default */
mp->m_fail_unmount = true; mp->m_fail_unmount = true;

View File

@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large);
DEFINE_KMEM_EVENT(kmem_realloc); DEFINE_KMEM_EVENT(kmem_realloc);
DEFINE_KMEM_EVENT(kmem_zone_alloc); DEFINE_KMEM_EVENT(kmem_zone_alloc);
TRACE_EVENT(xfs_check_new_dalign,
TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
TP_ARGS(mp, new_dalign, calc_rootino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, new_dalign)
__field(xfs_ino_t, sb_rootino)
__field(xfs_ino_t, calc_rootino)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->new_dalign = new_dalign;
__entry->sb_rootino = mp->m_sb.sb_rootino;
__entry->calc_rootino = calc_rootino;
),
TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->new_dalign, __entry->sb_rootino,
__entry->calc_rootino)
)
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH