From 30116ff6c6d140bc696cc624e6d8e38f018c886e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 14 Jun 2010 09:58:41 +0100 Subject: [PATCH 1/8] GFS2: Use nobh_writepage Use nobh_writepage rather than calling mpage_writepage directly. Signed-off-by: Steven Whitehouse Cc: Christoph Hellwig --- fs/gfs2/aops.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9f8b52500d63..9485a882d80b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -136,10 +136,7 @@ static int gfs2_writeback_writepage(struct page *page, if (ret <= 0) return ret; - ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); - if (ret == -EAGAIN) - ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); - return ret; + return nobh_writepage(page, gfs2_get_block_noalloc, wbc); } /** From ba6e93645f039bd357e04b7b9d18f4e67757725e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 14 Jun 2010 10:01:30 +0100 Subject: [PATCH 2/8] GFS2: Wait for journal id on mount if not specified on mount command line This patch implements a wait for the journal id in the case that it has not been specified on the command line. This is to allow the future removal of the mount.gfs2 helper. The journal id would instead be directly communicated by gfs_controld to the file system. Here is a comparison of the two systems: Current: 1. mount calls mount.gfs2 2. mount.gfs2 connects to gfs_controld to retrieve the journal id 3. mount.gfs2 adds the journal id to the mount command line and calls the mount system call 4. gfs_controld receives the status of the mount request via a uevent Proposed: 1. mount calls the mount system call (no mount.gfs2 helper) 2. gfs_controld receives a uevent for a gfs2 fs which it doesn't know about already 3. gfs_controld assigns a journal id to it via sysfs 4. the mount system call then completes as normal (sending a uevent according to status) The advantage of the proposed system is that it is completely backward compatible with the current system both at the kernel and at the userland levels. The "first" parameter can also be set the same way, with the restriction that it must be set before the journal id is assigned. In addition, if mount becomes stuck waiting for a reply from gfs_controld which never arrives, then it is killable and will abort the mount gracefully. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 27 +++++++++++++++++++-- fs/gfs2/sys.c | 57 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 80 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b5d7363b22da..8fcbce48a128 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -460,6 +460,7 @@ enum { SDF_NOBARRIERS = 3, SDF_NORECOVERY = 4, SDF_DEMOTE = 5, + SDF_NOJOURNALID = 6, }; #define GFS2_FSNAME_LEN 256 diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3593b3a7290e..45a4a36195d8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -76,7 +76,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) sb->s_fs_info = sdp; sdp->sd_vfs = sb; - + set_bit(SDF_NOJOURNALID, &sdp->sd_flags); gfs2_tune_init(&sdp->sd_tune); init_waitqueue_head(&sdp->sd_glock_wait); @@ -1050,7 +1050,8 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) ret = match_int(&tmp[0], &option); if (ret || option < 0) goto hostdata_error; - ls->ls_jid = option; + if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags)) + ls->ls_jid = option; break; case Opt_id: /* Obsolete, but left for backward compat purposes */ @@ -1102,6 +1103,24 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp) lm->lm_unmount(sdp); } +static int gfs2_journalid_wait(void *word) +{ + if (signal_pending(current)) + return -EINTR; + schedule(); + return 0; +} + +static int wait_on_journal(struct gfs2_sbd *sdp) +{ + if (sdp->sd_args.ar_spectator) + return 0; + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + return 0; + + return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE); +} + void gfs2_online_uevent(struct gfs2_sbd *sdp) { struct super_block *sb = sdp->sd_vfs; @@ -1194,6 +1213,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) goto fail_locking; + error = wait_on_journal(sdp); + if (error) + goto fail_sb; + error = init_inodes(sdp, DO); if (error) goto fail_sb; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 37f5393e68e6..d019d0d55e00 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -325,6 +325,30 @@ static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_first); } +static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned first; + int rv; + + rv = sscanf(buf, "%u", &first); + if (rv != 1 || first > 1) + return -EINVAL; + spin_lock(&sdp->sd_jindex_spin); + rv = -EBUSY; + if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) + goto out; + rv = -EINVAL; + if (sdp->sd_args.ar_spectator) + goto out; + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + goto out; + sdp->sd_lockstruct.ls_first = first; + rv = 0; +out: + spin_unlock(&sdp->sd_jindex_spin); + return rv ? rv : len; +} + static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -377,14 +401,41 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); } +static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned jid; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + + spin_lock(&sdp->sd_jindex_spin); + rv = -EINVAL; + if (sdp->sd_args.ar_spectator) + goto out; + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + goto out; + rv = -EBUSY; + if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) + goto out; + sdp->sd_lockstruct.ls_jid = jid; + smp_mb__after_clear_bit(); + wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); + rv = 0; +out: + spin_unlock(&sdp->sd_jindex_spin); + return rv ? rv : len; +} + #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); -GDLM_ATTR(jid, 0444, jid_show, NULL); -GDLM_ATTR(first, 0444, lkfirst_show, NULL); +GDLM_ATTR(jid, 0644, jid_show, jid_store); +GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0600, NULL, recover_store); GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); @@ -564,7 +615,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); - if (!sdp->sd_args.ar_spectator) + if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); if (gfs2_uuid_valid(uuid)) add_uevent_var(env, "UUID=%pUB", uuid); From 461cb419f074aab16836a660efb8e855b6c1609c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 24 Jun 2010 19:21:20 -0400 Subject: [PATCH 3/8] GFS2: Simplify gfs2_write_alloc_required Function gfs2_write_alloc_required always returned zero as its return code. Therefore, it doesn't need to return a return code at all. Given that, we can use the return value to return whether or not the dinode needs block allocations rather than passing that value in, which in turn simplifies a bunch of error checking. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 4 +--- fs/gfs2/bmap.c | 15 +++++---------- fs/gfs2/bmap.h | 2 +- fs/gfs2/file.c | 4 +--- fs/gfs2/quota.c | 15 +++------------ fs/gfs2/super.c | 9 +++------ 6 files changed, 14 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9485a882d80b..5e96cbd8a454 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -634,9 +634,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, } } - error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); - if (error) - goto out_unlock; + alloc_required = gfs2_write_alloc_required(ip, pos, len); if (alloc_required || gfs2_is_jdata(ip)) gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 84da64b551b2..744c29e2dcf4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1244,13 +1244,12 @@ int gfs2_file_dealloc(struct gfs2_inode *ip) * @ip: the file being written to * @offset: the offset to write to * @len: the number of bytes being written - * @alloc_required: set to 1 if an alloc is required, 0 otherwise * - * Returns: errno + * Returns: 1 if an alloc is required, 0 otherwise */ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len, int *alloc_required) + unsigned int len) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head bh; @@ -1258,26 +1257,23 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, u64 lblock, lblock_stop, size; u64 end_of_file; - *alloc_required = 0; - if (!len) return 0; if (gfs2_is_stuffed(ip)) { if (offset + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) - *alloc_required = 1; + return 1; return 0; } - *alloc_required = 1; shift = sdp->sd_sb.sb_bsize_shift; BUG_ON(gfs2_is_dir(ip)); end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; if (lblock_stop > end_of_file) - return 0; + return 1; size = (lblock_stop - lblock) << shift; do { @@ -1285,12 +1281,11 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, bh.b_size = size; gfs2_block_map(&ip->i_inode, lblock, &bh, 0); if (!buffer_mapped(&bh)) - return 0; + return 1; size -= bh.b_size; lblock += (bh.b_size >> ip->i_inode.i_blkbits); } while(size > 0); - *alloc_required = 0; return 0; } diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index c983177e05ac..a20a5213135a 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -52,6 +52,6 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size); int gfs2_truncatei_resume(struct gfs2_inode *ip); int gfs2_file_dealloc(struct gfs2_inode *ip); int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len, int *alloc_required); + unsigned int len); #endif /* __BMAP_DOT_H__ */ diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ed9a94f0ef15..4edd662c8232 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -351,7 +351,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; - int alloc_required = 0; struct gfs2_holder gh; struct gfs2_alloc *al; int ret; @@ -364,8 +363,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); - ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); - if (ret || !alloc_required) + if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) goto out_unlock; ret = -ENOMEM; al = gfs2_alloc_get(ip); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8f02d3db8f42..8bb643cb2658 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -787,15 +787,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) goto out; for (x = 0; x < num_qd; x++) { - int alloc_required; - offset = qd2offset(qda[x]); - error = gfs2_write_alloc_required(ip, offset, - sizeof(struct gfs2_quota), - &alloc_required); - if (error) - goto out_gunlock; - if (alloc_required) + if (gfs2_write_alloc_required(ip, offset, + sizeof(struct gfs2_quota))) nalloc++; } @@ -1584,10 +1578,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, goto out_i; offset = qd2offset(qd); - error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota), - &alloc_required); - if (error) - goto out_i; + alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); if (alloc_required) { al = gfs2_alloc_get(ip); if (al == NULL) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4d1aad38f1b1..4140811a921c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -342,8 +342,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - int ar; - int error; if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { @@ -352,13 +350,12 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) } jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; - error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar); - if (!error && ar) { + if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { gfs2_consist_inode(ip); - error = -EIO; + return -EIO; } - return error; + return 0; } /** From 4244b52e18be959ced77b984f268e46a0a7654e3 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 20 Jul 2010 19:45:03 -0700 Subject: [PATCH 4/8] GFS2: remove dependency on __GFP_NOFAIL The k[mc]allocs in dr_split_leaf() and dir_double_exhash() are failable, so remove __GFP_NOFAIL from their masks. Cc: Bob Peterson Signed-off-by: David Rientjes Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 6b48d7c268b2..b9dd88a78dd4 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -955,7 +955,12 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) /* Change the pointers. Don't bother distinguishing stuffed from non-stuffed. This code is complicated enough already. */ - lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL); + lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS); + if (!lp) { + error = -ENOMEM; + goto fail_brelse; + } + /* Change the pointers */ for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); @@ -1063,7 +1068,9 @@ static int dir_double_exhash(struct gfs2_inode *dip) /* Allocate both the "from" and "to" buffers in one big chunk */ - buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL); + buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); + if (!buf) + return -ENOMEM; for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { error = gfs2_dir_read_data(dip, (char *)buf, From d5341a92416706808dc5cd847826f28c08063c8c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 23 Jul 2010 14:05:51 +0100 Subject: [PATCH 5/8] GFS2: Make "try" lock not try quite so hard This looks like a big change, but in reality its only a single line of actual code change, the rest is just moving a function to before its new caller. The "try" flag for glocks is a rather subtle and delicate setting since it requires that the state machine tries just hard enough to ensure that it has a good chance of getting the requested lock, but no so hard that the request can land up blocked behind another. The patch adds in an additional check which will fail any queued try locks if there is another request blocking the try lock request which is not granted and compatible, nor in progress already. The check is made only after all pending locks which may be granted have been granted. I've checked this with the reproducer for the reported flock bug which this is intended to fix, and it now passes. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 0898f3ec8212..717531d1b2a8 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -327,6 +327,30 @@ static void gfs2_holder_wake(struct gfs2_holder *gh) wake_up_bit(&gh->gh_iflags, HIF_WAIT); } +/** + * do_error - Something unexpected has happened during a lock request + * + */ + +static inline void do_error(struct gfs2_glock *gl, const int ret) +{ + struct gfs2_holder *gh, *tmp; + + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (ret & LM_OUT_ERROR) + gh->gh_error = -EIO; + else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + gh->gh_error = GLR_TRYFAILED; + else + continue; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + } +} + /** * do_promote - promote as many requests as possible on the current queue * @gl: The glock @@ -375,35 +399,12 @@ restart: } if (gh->gh_list.prev == &gl->gl_holders) return 1; + do_error(gl, 0); break; } return 0; } -/** - * do_error - Something unexpected has happened during a lock request - * - */ - -static inline void do_error(struct gfs2_glock *gl, const int ret) -{ - struct gfs2_holder *gh, *tmp; - - list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) - continue; - if (ret & LM_OUT_ERROR) - gh->gh_error = -EIO; - else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) - gh->gh_error = GLR_TRYFAILED; - else - continue; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - } -} - /** * find_first_waiter - find the first gh that's waiting for the glock * @gl: the glock From 7cdee5dbf477409e4afc6c9063492dc2577b41ea Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 29 Jul 2010 14:39:29 +0100 Subject: [PATCH 6/8] Revert "GFS2: recovery stuck on transaction lock" This reverts commit b7dc2df5725fe7355fd76000ead7e39728e1b8a9. The initial patch didn't quite work since it doesn't cover all the possible routes by which the GLF_FROZEN flag might be set. A revised fix is coming up in the next patch. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 717531d1b2a8..2b3d8f8a8393 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -707,18 +707,8 @@ static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); - struct gfs2_holder *gh; int drop_ref = 0; - if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) { - spin_lock(&gl->gl_spin); - gh = find_first_waiter(gl); - if (gh && (gh->gh_flags & LM_FLAG_NOEXP) && - test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - } - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); drop_ref = 1; From c639d5d8f69f37e24ed0354373f61fcbde4b9354 Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Fri, 30 Jul 2010 11:34:52 -0400 Subject: [PATCH 7/8] GFS2: Fix typo in stuffed file data copy handling trunc_start() in bmap.c incorrectly uses sizeof(struct gfs2_inode) instead of sizeof(struct gfs2_dinode). Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 744c29e2dcf4..6f482809d1a3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1040,7 +1040,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) goto out; if (gfs2_is_stuffed(ip)) { - u64 dsize = size + sizeof(struct gfs2_inode); + u64 dsize = size + sizeof(struct gfs2_dinode); ip->i_disksize = size; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); From 0809f6ec18bbce54c996f5c36f4b9d371075c98b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 2 Aug 2010 10:15:17 +0100 Subject: [PATCH 8/8] GFS2: Fix recovery stuck bug (try #2) This is a clean up of the code which deals with LM_FLAG_NOEXP which aims to remove any possible race conditions by using gl_spin to cover the gap between testing for the LM_FLAG_NOEXP and the GL_FROZEN flag. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2b3d8f8a8393..9adf8f924e08 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1063,6 +1063,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh) spin_lock(&gl->gl_spin); add_to_queue(gh); + if ((LM_FLAG_NOEXP & gh->gh_flags) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); run_queue(gl, 1); spin_unlock(&gl->gl_spin); @@ -1319,6 +1322,36 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) gfs2_glock_put(gl); } +/** + * gfs2_should_freeze - Figure out if glock should be frozen + * @gl: The glock in question + * + * Glocks are not frozen if (a) the result of the dlm operation is + * an error, (b) the locking operation was an unlock operation or + * (c) if there is a "noexp" flagged request anywhere in the queue + * + * Returns: 1 if freezing should occur, 0 otherwise + */ + +static int gfs2_should_freeze(const struct gfs2_glock *gl) +{ + const struct gfs2_holder *gh; + + if (gl->gl_reply & ~LM_OUT_ST_MASK) + return 0; + if (gl->gl_target == LM_ST_UNLOCKED) + return 0; + + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (LM_FLAG_NOEXP & gh->gh_flags) + return 0; + } + + return 1; +} + /** * gfs2_glock_complete - Callback used by locking * @gl: Pointer to the glock @@ -1329,18 +1362,17 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + gl->gl_reply = ret; + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { - struct gfs2_holder *gh; spin_lock(&gl->gl_spin); - gh = find_first_waiter(gl); - if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) && - (gl->gl_target != LM_ST_UNLOCKED)) || - ((ret & ~LM_OUT_ST_MASK) != 0)) + if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - if (test_bit(GLF_FROZEN, &gl->gl_flags)) + spin_unlock(&gl->gl_spin); return; + } + spin_unlock(&gl->gl_spin); } set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_hold(gl);