gfs2 fixes

* To avoid deadlocks, actively cancel dlm locking requests when we give
   up on them.  Further dlm operations on the same lock will return
   -EBUSY until the cancel has been completed, so in that case, wait and
   repeat.  (This is rare.)
 * Lock inversion fixes in gfs2_inode_lookup() and gfs2_create_inode().
 * Some more fallout from the gfs2 mmap + page fault deadlock fixes
   (merge c03098d4b9).
 * Various other minor bug fixes and cleanups.
 -----BEGIN PGP SIGNATURE-----
 
 iQJIBAABCAAyFiEEJZs3krPW0xkhLMTc1b+f6wMTZToFAmJGCAsUHGFncnVlbmJh
 QHJlZGhhdC5jb20ACgkQ1b+f6wMTZTrWcg//TEDazop2y7rGMFsMBXI7HPyBu4uD
 BwoclS5IfjoQbBTtkl7cWmQViMk8s3EFGxdEBorfGmMEq65I/krHi4JXG2GETdui
 ORoi8NH1sW9H2GJXmwtE2wYZlJBZtdntoBGdPXWFvt1hLajf6WGpy/CR1Wd4rYak
 8AHQxtd98OtsA6LAPlWl2UaXS4m7rhEt0Iy83mqWtbBOvZsULczuraazawnoQ/m4
 Wf5pvb+73hpwTVUkruH0+If+vi/HF0WVv1nZVyMwrSh3mpvkrsZSkbN0fd0veAhD
 b5XGI1dD5+YPxAOdwDKqnqy8/E3gRekybmpcd48BXoxF4EX/AlLX/Zn9qnrAhY6M
 qEbGzC2UqLIrPe/KjzQ8+0aKPCY5FB1VqoRMAHC/bj7mlmNgGtHxQUXdDmC4LIi6
 GOLpnueI1KtA7Hb4HCgX0BLxSqUEhUuGssBkNIqGet1cRwmM33pt1J4CG4TDLBt/
 VZiERnN3qktSlmukvd3oLSZso4fVbg7PyFTl8YMgiLDNfgcZI9RY5qwIJYrOaucr
 KTNfR6lAL2slFPIVcLwmgJt+axogk6GnCkfDVMX2VLJnMQYqJnDYn6fVG9jngSB+
 F4UBZ/alzhpel08r8xtxjADFJzA+weG1I2jnikSLKlgVN+uiQTBrhqyWdtxtEqFM
 31Nd7piiSVQEvrM=
 =xMYz
 -----END PGP SIGNATURE-----

Merge tag 'gfs2-v5.17-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

Pull gfs2 fixes from Andreas Gruenbacher:

 - To avoid deadlocks, actively cancel dlm locking requests when we give
   up on them.

   Further dlm operations on the same lock will return -EBUSY until the
   cancel has been completed, so in that case, wait and repeat. (This is
   rare.)

 - Lock inversion fixes in gfs2_inode_lookup() and gfs2_create_inode().

 - Some more fallout from the gfs2 mmap + page fault deadlock fixes
   (merged in commit c03098d4b9: "Merge tag 'gfs2-v5.15-rc5-mmap-fault'").

 - Various other minor bug fixes and cleanups.

* tag 'gfs2-v5.17-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
  gfs2: Make sure FITRIM minlen is rounded up to fs block size
  gfs2: Make sure not to return short direct writes
  gfs2: Remove dead code in gfs2_file_read_iter
  gfs2: Fix gfs2_file_buffered_write endless loop workaround
  gfs2: Minor retry logic cleanup
  gfs2: Disable page faults during lockless buffered reads
  gfs2: Fix should_fault_in_pages() logic
  gfs2: Remove return value for gfs2_indirect_init
  gfs2: Initialize gh_error in gfs2_glock_nq
  gfs2: Make use of list_is_first
  gfs2: Switch lock order of inode and iopen glock
  gfs2: cancel timed-out glock requests
  gfs2: Expect -EBUSY after canceling dlm locking requests
  gfs2: gfs2_setattr_size error path fix
  gfs2: assign rgrp glock before compute_bitstructs
This commit is contained in:
Linus Torvalds 2022-03-31 15:57:50 -07:00
commit 3d198e42ce
8 changed files with 104 additions and 76 deletions

View File

@ -606,7 +606,7 @@ out:
return ret;
}
static inline __be64 *gfs2_indirect_init(struct metapath *mp,
static inline void gfs2_indirect_init(struct metapath *mp,
struct gfs2_glock *gl, unsigned int i,
unsigned offset, u64 bn)
{
@ -621,7 +621,6 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp,
gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
ptr += offset;
*ptr = cpu_to_be64(bn);
return ptr;
}
enum alloc_state {
@ -2146,7 +2145,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
ret = do_shrink(inode, newsize);
out:
gfs2_rs_delete(ip, NULL);
gfs2_rs_delete(ip);
gfs2_qa_put(ip);
return ret;
}

View File

@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE) {
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_delete(ip, &inode->i_writecount);
gfs2_rs_delete(ip);
gfs2_qa_put(ip);
}
return 0;
@ -775,8 +775,7 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
size_t *window_size)
{
size_t count = iov_iter_count(i);
char __user *p;
int pages = 1;
size_t size, offs;
if (likely(!count))
return false;
@ -785,18 +784,20 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
if (!iter_is_iovec(i))
return false;
size = PAGE_SIZE;
offs = offset_in_page(i->iov[0].iov_base + i->iov_offset);
if (*prev_count != count || !*window_size) {
int pages, nr_dirtied;
size_t nr_dirtied;
pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE));
size = ALIGN(offs + count, PAGE_SIZE);
size = min_t(size_t, size, SZ_1M);
nr_dirtied = max(current->nr_dirtied_pause -
current->nr_dirtied, 1);
pages = min(pages, nr_dirtied);
current->nr_dirtied, 8);
size = min(size, nr_dirtied << PAGE_SHIFT);
}
*prev_count = count;
p = i->iov[0].iov_base + i->iov_offset;
*window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
*window_size = size - offs;
return true;
}
@ -851,9 +852,9 @@ retry_under_glock:
leftover = fault_in_iov_iter_writeable(to, window_size);
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(gh))
goto retry;
if (gfs2_holder_queued(gh))
goto retry_under_glock;
goto retry;
}
}
if (gfs2_holder_queued(gh))
@ -920,9 +921,9 @@ retry_under_glock:
leftover = fault_in_iov_iter_readable(from, window_size);
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(gh))
goto retry;
if (gfs2_holder_queued(gh))
goto retry_under_glock;
goto retry;
}
}
out:
@ -950,20 +951,19 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
* and retry.
*/
if (iocb->ki_flags & IOCB_DIRECT) {
ret = gfs2_file_direct_read(iocb, to, &gh);
if (likely(ret != -ENOTBLK))
return ret;
iocb->ki_flags &= ~IOCB_DIRECT;
}
if (iocb->ki_flags & IOCB_DIRECT)
return gfs2_file_direct_read(iocb, to, &gh);
pagefault_disable();
iocb->ki_flags |= IOCB_NOIO;
ret = generic_file_read_iter(iocb, to);
iocb->ki_flags &= ~IOCB_NOIO;
pagefault_enable();
if (ret >= 0) {
if (!iov_iter_count(to))
return ret;
written = ret;
} else {
} else if (ret != -EFAULT) {
if (ret != -EAGAIN)
return ret;
if (iocb->ki_flags & IOCB_NOWAIT)
@ -989,13 +989,12 @@ retry_under_glock:
leftover = fault_in_iov_iter_writeable(to, window_size);
gfs2_holder_disallow_demote(&gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(&gh)) {
if (gfs2_holder_queued(&gh))
goto retry_under_glock;
if (written)
goto out_uninit;
goto retry;
}
goto retry_under_glock;
}
}
if (gfs2_holder_queued(&gh))
gfs2_glock_dq(&gh);
@ -1068,13 +1067,12 @@ retry_under_glock:
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
from->count = min(from->count, window_size - leftover);
if (!gfs2_holder_queued(gh)) {
if (read)
if (gfs2_holder_queued(gh))
goto retry_under_glock;
if (read && !(iocb->ki_flags & IOCB_DIRECT))
goto out_uninit;
goto retry;
}
goto retry_under_glock;
}
}
out_unlock:
if (gfs2_holder_queued(gh))
@ -1083,6 +1081,7 @@ out_uninit:
gfs2_holder_uninit(gh);
if (statfs_gh)
kfree(statfs_gh);
from->count = orig_count - read;
return read ? read : ret;
}
@ -1497,7 +1496,6 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (error != GLR_TRYFAILED)
break;
fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
fl_gh->gh_error = 0;
msleep(sleeptime);
}
if (error) {

View File

@ -542,7 +542,7 @@ restart:
* some reason. If this holder is the head of the list, it
* means we have a blocked holder at the head, so return 1.
*/
if (gh->gh_list.prev == &gl->gl_holders)
if (list_is_first(&gh->gh_list, &gl->gl_holders))
return 1;
do_error(gl, 0);
break;
@ -669,6 +669,8 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
/* Check for state != intended state */
if (unlikely(state != gl->gl_target)) {
if (gh && (ret & LM_OUT_CANCELED))
gfs2_holder_wake(gh);
if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
/* move to back of queue and try next entry */
if (ret & LM_OUT_CANCELED) {
@ -1259,7 +1261,6 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
gh->gh_owner_pid = get_pid(task_pid(current));
gh->gh_state = state;
gh->gh_flags = flags;
gh->gh_error = 0;
gh->gh_iflags = 0;
gfs2_glock_hold(gl);
}
@ -1565,6 +1566,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
if (test_bit(GLF_LRU, &gl->gl_flags))
gfs2_glock_remove_from_lru(gl);
gh->gh_error = 0;
spin_lock(&gl->gl_lockref.lock);
add_to_queue(gh);
if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
@ -1691,6 +1693,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
spin_unlock(&gl->gl_lockref.lock);
gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl);
wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
}
__gfs2_glock_dq(gh);
spin_unlock(&gl->gl_lockref.lock);
}

View File

@ -131,7 +131,21 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_glock *io_gl;
error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE,
&ip->i_gl);
if (unlikely(error))
goto fail;
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE,
&io_gl);
if (unlikely(error))
goto fail;
if (blktype != GFS2_BLKST_UNLINKED)
gfs2_cancel_delete_work(io_gl);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT,
&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
if (unlikely(error))
goto fail;
@ -161,16 +175,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
goto fail;
if (blktype != GFS2_BLKST_UNLINKED)
gfs2_cancel_delete_work(io_gl);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
gfs2_glock_put(io_gl);
if (unlikely(error))
goto fail;
/* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */
inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1);
inode->i_atime.tv_nsec = 0;
@ -716,13 +720,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
BUG_ON(error);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
if (error)
goto fail_gunlock3;
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
goto fail_gunlock2;
goto fail_gunlock3;
if (blocks > 1) {
ip->i_eattr = ip->i_no_addr + 1;
@ -731,10 +739,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
glock_set_object(ip->i_gl, ip);
glock_set_object(io_gl, ip);
gfs2_set_iop(inode);
@ -745,14 +749,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (default_acl) {
error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
if (error)
goto fail_gunlock3;
goto fail_gunlock4;
posix_acl_release(default_acl);
default_acl = NULL;
}
if (acl) {
error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS);
if (error)
goto fail_gunlock3;
goto fail_gunlock4;
posix_acl_release(acl);
acl = NULL;
}
@ -760,11 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name,
&gfs2_initxattrs, NULL);
if (error)
goto fail_gunlock3;
goto fail_gunlock4;
error = link_dinode(dip, name, ip, &da);
if (error)
goto fail_gunlock3;
goto fail_gunlock4;
mark_inode_dirty(inode);
d_instantiate(dentry, inode);
@ -782,9 +786,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
unlock_new_inode(inode);
return error;
fail_gunlock3:
fail_gunlock4:
glock_clear_object(ip->i_gl, ip);
glock_clear_object(io_gl, ip);
fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_gunlock2:
gfs2_glock_put(io_gl);
@ -793,7 +798,7 @@ fail_free_inode:
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
gfs2_rs_delete(ip, NULL);
gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
posix_acl_release(default_acl);

View File

@ -261,6 +261,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
int req;
u32 lkf;
char strname[GDLM_STRNAME_BYTES] = "";
int error;
req = make_mode(gl->gl_name.ln_sbd, req_state);
lkf = make_flags(gl, flags, req);
@ -279,8 +280,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
* Submit the actual lock request.
*/
return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
again:
error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
if (error == -EBUSY) {
msleep(20);
goto again;
}
return error;
}
static void gdlm_put_lock(struct gfs2_glock *gl)
@ -312,8 +319,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
return;
}
again:
error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
NULL, gl);
if (error == -EBUSY) {
msleep(20);
goto again;
}
if (error) {
fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
gl->gl_name.ln_type,

View File

@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
* @wcount: The inode's write count, or NULL
*
*/
void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
void gfs2_rs_delete(struct gfs2_inode *ip)
{
struct inode *inode = &ip->i_inode;
down_write(&ip->i_rw_mutex);
if ((wcount == NULL) || (atomic_read(wcount) <= 1))
if (atomic_read(&inode->i_writecount) <= 1)
gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
}
@ -922,15 +923,15 @@ static int read_rindex_entry(struct gfs2_inode *ip)
spin_lock_init(&rgd->rd_rsspin);
mutex_init(&rgd->rd_mutex);
error = compute_bitstructs(rgd);
if (error)
goto fail;
error = gfs2_glock_get(sdp, rgd->rd_addr,
&gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
if (error)
goto fail;
error = compute_bitstructs(rgd);
if (error)
goto fail_glock;
rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
rgd->rd_flags &= ~GFS2_RDF_PREFERRED;
if (rgd->rd_data > sdp->sd_max_rg_data)
@ -944,6 +945,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
}
error = 0; /* someone else read in the rgrp; free it and ignore it */
fail_glock:
gfs2_glock_put(rgd->rd_gl);
fail:
@ -1415,7 +1417,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
start = r.start >> bs_shift;
end = start + (r.len >> bs_shift);
minlen = max_t(u64, r.minlen,
minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
minlen = max_t(u64, minlen,
q->limits.discard_granularity) >> bs_shift;
if (end <= start || minlen > sdp->sd_max_rg_data)

View File

@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,

View File

@ -1396,7 +1396,7 @@ out:
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
gfs2_rs_delete(ip, NULL);
gfs2_rs_deltree(&ip->i_res);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);