From 936aeb5c4a9fa799abd7d630a94223acedcaad50 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Wed, 22 Sep 2010 20:21:17 -0700 Subject: [PATCH 1/6] ceph: fix list_add usage on unsafe_writes list Fix argument order. Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c044a4f0457..66e4da6dba22 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -697,7 +697,7 @@ more: * start_request so that a tid has been assigned. */ spin_lock(&ci->i_unsafe_lock); - list_add(&ci->i_unsafe_writes, &req->r_unsafe_item); + list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); spin_unlock(&ci->i_unsafe_lock); ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); } From 6bc18876ba01fd4a077db6e1ed27201e4bda8864 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 27 Sep 2010 10:18:52 -0700 Subject: [PATCH 2/6] ceph: avoid null deref in osd request error path If we interrupt an osd request, we call __cancel_request, but it wasn't verifying that req->r_osd was non-NULL before dereferencing it. This could cause a crash if osds were flapping and we aborted a request on said osd. Reported-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index dfced1dacbcd..3b5571b8ce22 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -549,7 +549,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, */ static void __cancel_request(struct ceph_osd_request *req) { - if (req->r_sent) { + if (req->r_sent && req->r_osd) { ceph_con_revoke(&req->r_osd->o_con, req->r_request); req->r_sent = 0; } From 92923dcbfcad107b0e0469f579a2455729ccf10e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 5 Oct 2010 16:03:41 +0530 Subject: [PATCH 3/6] ceph: Fix return value of encode_fh function encode_fh function should return 255 on error as done by other file system to indicate EOVERFLOW. Also max_len is in sizeof(u32) units and not in bytes. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Sage Weil --- fs/ceph/export.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4480cb1c63e7..387c5823944e 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -42,32 +42,34 @@ struct ceph_nfs_confh { static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, int connectable) { + int type; struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh; struct dentry *parent = dentry->d_parent; struct inode *inode = dentry->d_inode; - int type; + int connected_handle_length = sizeof(*cfh)/4; + int handle_length = sizeof(*fh)/4; /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; - if (*max_len >= sizeof(*cfh)) { + if (*max_len >= connected_handle_length) { dout("encode_fh %p connectable\n", dentry); cfh->ino = ceph_ino(dentry->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_name_hash = parent->d_name.hash; - *max_len = sizeof(*cfh); + *max_len = connected_handle_length; type = 2; - } else if (*max_len > sizeof(*fh)) { + } else if (*max_len >= handle_length) { if (connectable) - return -ENOSPC; + return 255; dout("encode_fh %p\n", dentry); fh->ino = ceph_ino(dentry->d_inode); - *max_len = sizeof(*fh); + *max_len = handle_length; type = 1; } else { - return -ENOSPC; + return 255; } return type; } From bba0cd0e3d97472855840af817b766e3f632a501 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 5 Oct 2010 16:03:42 +0530 Subject: [PATCH 4/6] ceph: Update max_len with minimum required size encode_fh on error should update max_len with minimum required size, so that caller can redo the call with the reallocated buffer. This is required with open by handle patch series Signed-off-by: Aneesh Kumar K.V Signed-off-by: Sage Weil --- fs/ceph/export.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 387c5823944e..e38423e82f2e 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -62,13 +62,16 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, *max_len = connected_handle_length; type = 2; } else if (*max_len >= handle_length) { - if (connectable) + if (connectable) { + *max_len = connected_handle_length; return 255; + } dout("encode_fh %p\n", dentry); fh->ino = ceph_ino(dentry->d_inode); *max_len = handle_length; type = 1; } else { + *max_len = handle_length; return 255; } return type; From 21b559de56695d36b3f0819b7e2454737db254f8 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Wed, 6 Oct 2010 15:46:30 -0700 Subject: [PATCH 5/6] ceph: send cap release message early on failed revoke. If an MDS tries to revoke caps that we don't have, we want to send releases early since they probably contain the caps message the MDS is looking for. Previously, we only sent the messages if we didn't have the inode either. But in a multi-mds system we can retain the inode after dropping all caps for a single MDS. Signed-off-by: Greg Farnum Signed-off-by: Sage Weil --- fs/ceph/caps.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 73c153092f72..97de325a49f8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2774,15 +2774,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (op == CEPH_CAP_OP_IMPORT) __queue_cap_release(session, vino.ino, cap_id, mseq, seq); - - /* - * send any full release message to try to move things - * along for the mds (who clearly thinks we still have this - * cap). - */ - ceph_add_cap_releases(mdsc, session); - ceph_send_cap_releases(mdsc, session); - goto done; + goto flush_cap_releases; } /* these will work even if we don't have a cap yet */ @@ -2810,7 +2802,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); - goto done; + goto flush_cap_releases; } /* note that each of these drops i_lock for us */ @@ -2834,6 +2826,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, ceph_cap_op_name(op)); } + goto done; + +flush_cap_releases: + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_add_cap_releases(mdsc, session); + ceph_send_cap_releases(mdsc, session); + done: mutex_unlock(&session->s_mutex); done_unlocked: From d91f2438d881514e4a923fd786dbd94b764a9440 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 22 Sep 2010 11:16:00 -0700 Subject: [PATCH 6/6] ceph: update issue_seq on cap grant We need to update the issue_seq on any grant operation, be it via an MDS reply or a separate grant message. The update in the grant path was missing. This broke cap release for inodes in which the MDS sent an explicit grant message that was not soon after followed by a successful MDS reply on the same inode. Also fix the signedness on seq locals. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 97de325a49f8..5e9da996a151 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2283,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; - int seq = le32_to_cpu(grant->seq); + unsigned seq = le32_to_cpu(grant->seq); + unsigned issue_seq = le32_to_cpu(grant->issue_seq); int newcaps = le32_to_cpu(grant->caps); int issued, implemented, used, wanted, dirty; u64 size = le64_to_cpu(grant->size); @@ -2295,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, int revoked_rdcache = 0; int queue_invalidate = 0; - dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", - inode, cap, mds, seq, ceph_cap_string(newcaps)); + dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", + inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, inode->i_size); @@ -2392,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } cap->seq = seq; + cap->issue_seq = issue_seq; /* file layout may have changed */ ci->i_layout = grant->layout;