xfs: enable quota for realtime volumes [v5.5 08/10]

At some point, I realized that I've refactored enough of the quota code
 in XFS that I should evaluate whether or not quota actually works on
 realtime volumes.  It turns out that it nearly works: the only broken
 pieces are chown and delayed allocation, and reporting of project
 quotas in the statvfs output for projinherit+rtinherit directories.
 
 Fix these things and we can have realtime quotas again after 20 years.
 
 With a bit of luck, this should all go splendidly.
 
 Signed-off-by: Darrick J. Wong <djwong@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQ2qTKExjcn+O1o2YRKO3ySh0YRpgUCZyqQdAAKCRBKO3ySh0YR
 pkh4AQCtjI73mwU9rhzs2MO5nLNlpg9bgOxute+G4eqGCP02CwEAvg/LpT9yA6qk
 1jM5x8C6xy03yIWTUc+DcMPqoCqJzwc=
 =2QMv
 -----END PGP SIGNATURE-----

Merge tag 'realtime-quotas-6.13_2024-11-05' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into staging-merge

xfs: enable quota for realtime volumes [v5.5 08/10]

At some point, I realized that I've refactored enough of the quota code
in XFS that I should evaluate whether or not quota actually works on
realtime volumes.  It turns out that it nearly works: the only broken
pieces are chown and delayed allocation, and reporting of project
quotas in the statvfs output for projinherit+rtinherit directories.

Fix these things and we can have realtime quotas again after 20 years.

With a bit of luck, this should all go splendidly.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
This commit is contained in:
Carlos Maiolino 2024-11-12 11:02:25 +01:00
commit 8ca118e17a
11 changed files with 182 additions and 76 deletions

View File

@ -277,6 +277,25 @@ xfs_qm_init_dquot_blk(
xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
}
static void
xfs_dquot_set_prealloc(
struct xfs_dquot_pre *pre,
const struct xfs_dquot_res *res)
{
xfs_qcnt_t space;
pre->q_prealloc_hi_wmark = res->hardlimit;
pre->q_prealloc_lo_wmark = res->softlimit;
space = div_u64(pre->q_prealloc_hi_wmark, 100);
if (!pre->q_prealloc_lo_wmark)
pre->q_prealloc_lo_wmark = space * 95;
pre->q_low_space[XFS_QLOWSP_1_PCNT] = space;
pre->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
pre->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
}
/*
* Initialize the dynamic speculative preallocation thresholds. The lo/hi
* watermarks correspond to the soft and hard limits by default. If a soft limit
@ -285,22 +304,8 @@ xfs_qm_init_dquot_blk(
void
xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
{
uint64_t space;
dqp->q_prealloc_hi_wmark = dqp->q_blk.hardlimit;
dqp->q_prealloc_lo_wmark = dqp->q_blk.softlimit;
if (!dqp->q_prealloc_lo_wmark) {
dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
do_div(dqp->q_prealloc_lo_wmark, 100);
dqp->q_prealloc_lo_wmark *= 95;
}
space = dqp->q_prealloc_hi_wmark;
do_div(space, 100);
dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
xfs_dquot_set_prealloc(&dqp->q_blk_prealloc, &dqp->q_blk);
xfs_dquot_set_prealloc(&dqp->q_rtb_prealloc, &dqp->q_rtb);
}
/*

View File

@ -56,6 +56,12 @@ xfs_dquot_res_over_limits(
return false;
}
struct xfs_dquot_pre {
xfs_qcnt_t q_prealloc_lo_wmark;
xfs_qcnt_t q_prealloc_hi_wmark;
int64_t q_low_space[XFS_QLOWSP_MAX];
};
/*
* The incore dquot structure
*/
@ -76,9 +82,9 @@ struct xfs_dquot {
struct xfs_dq_logitem q_logitem;
xfs_qcnt_t q_prealloc_lo_wmark;
xfs_qcnt_t q_prealloc_hi_wmark;
int64_t q_low_space[XFS_QLOWSP_MAX];
struct xfs_dquot_pre q_blk_prealloc;
struct xfs_dquot_pre q_rtb_prealloc;
struct mutex q_qlock;
struct completion q_flush;
atomic_t q_pincount;
@ -192,7 +198,11 @@ static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
int64_t freesp;
freesp = dqp->q_blk.hardlimit - dqp->q_blk.reserved;
if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
if (freesp < dqp->q_blk_prealloc.q_low_space[XFS_QLOWSP_1_PCNT])
return true;
freesp = dqp->q_rtb.hardlimit - dqp->q_rtb.reserved;
if (freesp < dqp->q_rtb_prealloc.q_low_space[XFS_QLOWSP_1_PCNT])
return true;
return false;

View File

@ -353,16 +353,26 @@ xfs_quota_need_throttle(
xfs_fsblock_t alloc_blocks)
{
struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
struct xfs_dquot_res *res;
struct xfs_dquot_pre *pre;
if (!dq || !xfs_this_quota_on(ip->i_mount, type))
return false;
if (XFS_IS_REALTIME_INODE(ip)) {
res = &dq->q_rtb;
pre = &dq->q_rtb_prealloc;
} else {
res = &dq->q_blk;
pre = &dq->q_blk_prealloc;
}
/* no hi watermark, no throttle */
if (!dq->q_prealloc_hi_wmark)
if (!pre->q_prealloc_hi_wmark)
return false;
/* under the lo watermark, no throttle */
if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark)
if (res->reserved + alloc_blocks < pre->q_prealloc_lo_wmark)
return false;
return true;
@ -377,22 +387,35 @@ xfs_quota_calc_throttle(
int64_t *qfreesp)
{
struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
struct xfs_dquot_res *res;
struct xfs_dquot_pre *pre;
int64_t freesp;
int shift = 0;
if (!dq) {
res = NULL;
pre = NULL;
} else if (XFS_IS_REALTIME_INODE(ip)) {
res = &dq->q_rtb;
pre = &dq->q_rtb_prealloc;
} else {
res = &dq->q_blk;
pre = &dq->q_blk_prealloc;
}
/* no dq, or over hi wmark, squash the prealloc completely */
if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) {
if (!res || res->reserved >= pre->q_prealloc_hi_wmark) {
*qblocks = 0;
*qfreesp = 0;
return;
}
freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved;
if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
freesp = pre->q_prealloc_hi_wmark - res->reserved;
if (freesp < pre->q_low_space[XFS_QLOWSP_5_PCNT]) {
shift = 2;
if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
if (freesp < pre->q_low_space[XFS_QLOWSP_3_PCNT])
shift += 2;
if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
if (freesp < pre->q_low_space[XFS_QLOWSP_1_PCNT])
shift += 2;
}

View File

@ -1351,8 +1351,8 @@ xfs_qm_dqusage_adjust(
void *data)
{
struct xfs_inode *ip;
xfs_qcnt_t nblks;
xfs_filblks_t rtblks = 0; /* total rt blks */
xfs_filblks_t nblks, rtblks;
unsigned int lock_mode;
int error;
ASSERT(XFS_IS_QUOTA_ON(mp));
@ -1393,18 +1393,17 @@ xfs_qm_dqusage_adjust(
ASSERT(ip->i_delayed_blks == 0);
lock_mode = xfs_ilock_data_map_shared(ip);
if (XFS_IS_REALTIME_INODE(ip)) {
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
if (error) {
xfs_iunlock(ip, lock_mode);
goto error0;
xfs_bmap_count_leaves(ifp, &rtblks);
}
}
nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
xfs_iunlock(ip, lock_mode);
/*
* Add the (disk blocks and inode) resources occupied by this
@ -1664,10 +1663,11 @@ xfs_qm_mount_quotas(
uint sbf;
/*
* If quotas on realtime volumes is not supported, we disable
* quotas immediately.
* If quotas on realtime volumes is not supported, disable quotas
* immediately. We only support rtquota if rtgroups are enabled to
* avoid problems with older kernels.
*/
if (mp->m_sb.sb_rextents) {
if (mp->m_sb.sb_rextents && !xfs_has_rtgroups(mp)) {
xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
mp->m_qflags = 0;
goto write_changes;
@ -2043,9 +2043,8 @@ xfs_qm_vop_chown(
struct xfs_dquot *newdq)
{
struct xfs_dquot *prevdq;
uint bfield = XFS_IS_REALTIME_INODE(ip) ?
XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
xfs_filblks_t dblocks, rblocks;
bool isrt = XFS_IS_REALTIME_INODE(ip);
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
@ -2056,11 +2055,17 @@ xfs_qm_vop_chown(
ASSERT(prevdq);
ASSERT(prevdq != newdq);
xfs_trans_mod_ino_dquot(tp, ip, prevdq, bfield, -(ip->i_nblocks));
xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_BCOUNT,
-(xfs_qcnt_t)dblocks);
xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_RTBCOUNT,
-(xfs_qcnt_t)rblocks);
xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
/* the sparkling new dquot */
xfs_trans_mod_ino_dquot(tp, ip, newdq, bfield, ip->i_nblocks);
xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_BCOUNT, dblocks);
xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_RTBCOUNT, rblocks);
xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
@ -2070,7 +2075,8 @@ xfs_qm_vop_chown(
* (having already bumped up the real counter) so that we don't have
* any reservation to give back when we commit.
*/
xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
xfs_trans_mod_dquot(tp, newdq,
isrt ? XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
-ip->i_delayed_blks);
/*
@ -2082,8 +2088,13 @@ xfs_qm_vop_chown(
*/
tp->t_flags |= XFS_TRANS_DIRTY;
xfs_dqlock(prevdq);
ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
prevdq->q_blk.reserved -= ip->i_delayed_blks;
if (isrt) {
ASSERT(prevdq->q_rtb.reserved >= ip->i_delayed_blks);
prevdq->q_rtb.reserved -= ip->i_delayed_blks;
} else {
ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
prevdq->q_blk.reserved -= ip->i_delayed_blks;
}
xfs_dqunlock(prevdq);
/*
@ -2168,6 +2179,8 @@ xfs_inode_near_dquot_enforcement(
xfs_dqtype_t type)
{
struct xfs_dquot *dqp;
struct xfs_dquot_res *res;
struct xfs_dquot_pre *pre;
int64_t freesp;
/* We only care for quotas that are enabled and enforced. */
@ -2176,21 +2189,30 @@ xfs_inode_near_dquot_enforcement(
return false;
if (xfs_dquot_res_over_limits(&dqp->q_ino) ||
xfs_dquot_res_over_limits(&dqp->q_blk) ||
xfs_dquot_res_over_limits(&dqp->q_rtb))
return true;
if (XFS_IS_REALTIME_INODE(ip)) {
res = &dqp->q_rtb;
pre = &dqp->q_rtb_prealloc;
} else {
res = &dqp->q_blk;
pre = &dqp->q_blk_prealloc;
}
/* For space on the data device, check the various thresholds. */
if (!dqp->q_prealloc_hi_wmark)
if (!pre->q_prealloc_hi_wmark)
return false;
if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark)
if (res->reserved < pre->q_prealloc_lo_wmark)
return false;
if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark)
if (res->reserved >= pre->q_prealloc_hi_wmark)
return true;
freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved;
if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT])
freesp = pre->q_prealloc_hi_wmark - res->reserved;
if (freesp < pre->q_low_space[XFS_QLOWSP_5_PCNT])
return true;
return false;

View File

@ -19,18 +19,24 @@
STATIC void
xfs_fill_statvfs_from_dquot(
struct kstatfs *statp,
struct xfs_inode *ip,
struct xfs_dquot *dqp)
{
struct xfs_dquot_res *blkres = &dqp->q_blk;
uint64_t limit;
limit = dqp->q_blk.softlimit ?
dqp->q_blk.softlimit :
dqp->q_blk.hardlimit;
if (XFS_IS_REALTIME_MOUNT(ip->i_mount) &&
(ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
blkres = &dqp->q_rtb;
limit = blkres->softlimit ?
blkres->softlimit :
blkres->hardlimit;
if (limit && statp->f_blocks > limit) {
statp->f_blocks = limit;
statp->f_bfree = statp->f_bavail =
(statp->f_blocks > dqp->q_blk.reserved) ?
(statp->f_blocks - dqp->q_blk.reserved) : 0;
(statp->f_blocks > blkres->reserved) ?
(statp->f_blocks - blkres->reserved) : 0;
}
limit = dqp->q_ino.softlimit ?
@ -61,7 +67,7 @@ xfs_qm_statvfs(
struct xfs_dquot *dqp;
if (!xfs_qm_dqget(mp, ip->i_projid, XFS_DQTYPE_PROJ, false, &dqp)) {
xfs_fill_statvfs_from_dquot(statp, dqp);
xfs_fill_statvfs_from_dquot(statp, ip, dqp);
xfs_qm_dqput(dqp);
}
}

View File

@ -130,6 +130,7 @@ extern void xfs_qm_mount_quotas(struct xfs_mount *);
extern void xfs_qm_unmount(struct xfs_mount *);
extern void xfs_qm_unmount_quotas(struct xfs_mount *);
bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type);
int xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks);
# ifdef CONFIG_XFS_LIVE_HOOKS
void xfs_trans_mod_ino_dquot(struct xfs_trans *tp, struct xfs_inode *ip,
@ -209,6 +210,11 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp,
#define xfs_qm_unmount_quotas(mp)
#define xfs_inode_near_dquot_enforcement(ip, type) (false)
static inline int xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks)
{
return 0;
}
# ifdef CONFIG_XFS_LIVE_HOOKS
# define xfs_dqtrx_hook_enable() ((void)0)
# define xfs_dqtrx_hook_disable() ((void)0)
@ -216,12 +222,6 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp,
#endif /* CONFIG_XFS_QUOTA */
static inline int
xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks)
{
return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false);
}
static inline void
xfs_quota_unreserve_blkres(struct xfs_inode *ip, uint64_t blocks)
{

View File

@ -1265,7 +1265,9 @@ xfs_growfs_rt(
/* Unsupported realtime features. */
error = -EOPNOTSUPP;
if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
if (xfs_has_quota(mp) && !xfs_has_rtgroups(mp))
goto out_unlock;
if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
goto out_unlock;
error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks);

View File

@ -115,10 +115,11 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
static int xqm_proc_show(struct seq_file *m, void *v)
{
/* maximum; incore; ratio free to inuse; freelist */
seq_printf(m, "%d\t%d\t%d\t%u\n",
/* maximum; incore; ratio free to inuse; freelist; rtquota */
seq_printf(m, "%d\t%d\t%d\t%u\t%s\n",
0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT),
0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1));
0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1),
IS_ENABLED(CONFIG_XFS_RT) ? "rtquota" : "quota");
return 0;
}

View File

@ -877,12 +877,6 @@ xfs_fs_statfs(
ffree = statp->f_files - (icount - ifree);
statp->f_ffree = max_t(int64_t, ffree, 0);
if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
(XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
xfs_qm_statvfs(ip, statp);
if (XFS_IS_REALTIME_MOUNT(mp) &&
(ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
s64 freertx;
@ -893,6 +887,11 @@ xfs_fs_statfs(
xfs_rtbxlen_to_blen(mp, freertx);
}
if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
(XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
xfs_qm_statvfs(ip, statp);
return 0;
}

View File

@ -1288,11 +1288,26 @@ retry:
gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL;
pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL;
if (udqp || gdqp || pdqp) {
xfs_filblks_t dblocks, rblocks;
unsigned int qflags = XFS_QMOPT_RES_REGBLKS;
bool isrt = XFS_IS_REALTIME_INODE(ip);
if (force)
qflags |= XFS_QMOPT_FORCE_RES;
if (isrt) {
error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
goto out_cancel;
}
xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
if (isrt)
rblocks += ip->i_delayed_blks;
else
dblocks += ip->i_delayed_blks;
/*
* Reserve enough quota to handle blocks on disk and reserved
* for a delayed allocation. We'll actually transfer the
@ -1300,8 +1315,20 @@ retry:
* though that part is only semi-transactional.
*/
error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
pdqp, ip->i_nblocks + ip->i_delayed_blks,
1, qflags);
pdqp, dblocks, 1, qflags);
if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
xfs_trans_cancel(tp);
xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
retried = true;
goto retry;
}
if (error)
goto out_cancel;
/* Do the same for realtime. */
qflags = XFS_QMOPT_RES_RTBLKS | (qflags & XFS_QMOPT_FORCE_RES);
error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
pdqp, rblocks, 0, qflags);
if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
xfs_trans_cancel(tp);
xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);

View File

@ -1031,3 +1031,14 @@ xfs_trans_free_dqinfo(
kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo);
tp->t_dqinfo = NULL;
}
int
xfs_quota_reserve_blkres(
struct xfs_inode *ip,
int64_t blocks)
{
if (XFS_IS_REALTIME_INODE(ip))
return xfs_trans_reserve_quota_nblks(NULL, ip, 0, blocks,
false);
return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false);
}