Merge branch 'flexfiles'
* flexfiles: (53 commits) pnfs: lookup new lseg at lseg boundary nfs41: .init_read and .init_write can be called with valid pg_lseg pnfs: Update documentation on the Layout Drivers pnfs/flexfiles: Add the FlexFile Layout Driver nfs: count DIO good bytes correctly with mirroring nfs41: wait for LAYOUTRETURN before retrying LAYOUTGET nfs: add a helper to set NFS_ODIRECT_RESCHED_WRITES to direct writes nfs41: add NFS_LAYOUT_RETRY_LAYOUTGET to layout header flags nfs/flexfiles: send layoutreturn before freeing lseg nfs41: introduce NFS_LAYOUT_RETURN_BEFORE_CLOSE nfs41: allow async version layoutreturn nfs41: add range to layoutreturn args pnfs: allow LD to ask to resend read through pnfs nfs: add nfs_pgio_current_mirror helper nfs: only reset desc->pg_mirror_idx when mirroring is supported nfs41: add a debug warning if we destroy an unempty layout pnfs: fail comparison when bucket verifier not set nfs: mirroring support for direct io nfs: add mirroring support to pgio layer pnfs: pass ds_commit_idx through the commit path ... Conflicts: fs/nfs/pnfs.c fs/nfs/pnfs.h
This commit is contained in:
		
						commit
						e2c63e091e
					
				| @ -57,15 +57,16 @@ bit is set, preventing any new lsegs from being added. | |||||||
| layout drivers | layout drivers | ||||||
| -------------- | -------------- | ||||||
| 
 | 
 | ||||||
| PNFS utilizes what is called layout drivers. The STD defines 3 basic | PNFS utilizes what is called layout drivers. The STD defines 4 basic | ||||||
| layout types: "files" "objects" and "blocks". For each of these types | layout types: "files", "objects", "blocks", and "flexfiles". For each | ||||||
| there is a layout-driver with a common function-vectors table which | of these types there is a layout-driver with a common function-vectors | ||||||
| are called by the nfs-client pnfs-core to implement the different layout | table which are called by the nfs-client pnfs-core to implement the | ||||||
| types. | different layout types. | ||||||
| 
 | 
 | ||||||
| Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c | Files-layout-driver code is in: fs/nfs/filelayout/.. directory | ||||||
| Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory | Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory | ||||||
| Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory | Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory | ||||||
|  | Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory | ||||||
| 
 | 
 | ||||||
| objects-layout setup | objects-layout setup | ||||||
| -------------------- | -------------------- | ||||||
|  | |||||||
| @ -128,6 +128,11 @@ config PNFS_OBJLAYOUT | |||||||
| 	depends on NFS_V4_1 && SCSI_OSD_ULD | 	depends on NFS_V4_1 && SCSI_OSD_ULD | ||||||
| 	default NFS_V4 | 	default NFS_V4 | ||||||
| 
 | 
 | ||||||
|  | config PNFS_FLEXFILE_LAYOUT | ||||||
|  | 	tristate | ||||||
|  | 	depends on NFS_V4_1 && NFS_V3 | ||||||
|  | 	default m | ||||||
|  | 
 | ||||||
| config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | ||||||
| 	string "NFSv4.1 Implementation ID Domain" | 	string "NFSv4.1 Implementation ID Domain" | ||||||
| 	depends on NFS_V4_1 | 	depends on NFS_V4_1 | ||||||
|  | |||||||
| @ -27,9 +27,10 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o | |||||||
| 	  dns_resolve.o nfs4trace.o | 	  dns_resolve.o nfs4trace.o | ||||||
| nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | ||||||
| nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o | nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o | ||||||
| nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o | nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o pnfs_nfs.o | ||||||
| nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o | nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o | ||||||
| 
 | 
 | ||||||
| obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ | ||||||
| obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | ||||||
| obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | ||||||
|  | obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/ | ||||||
|  | |||||||
| @ -860,12 +860,14 @@ static const struct nfs_pageio_ops bl_pg_read_ops = { | |||||||
| 	.pg_init = bl_pg_init_read, | 	.pg_init = bl_pg_init_read, | ||||||
| 	.pg_test = bl_pg_test_read, | 	.pg_test = bl_pg_test_read, | ||||||
| 	.pg_doio = pnfs_generic_pg_readpages, | 	.pg_doio = pnfs_generic_pg_readpages, | ||||||
|  | 	.pg_cleanup = pnfs_generic_pg_cleanup, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct nfs_pageio_ops bl_pg_write_ops = { | static const struct nfs_pageio_ops bl_pg_write_ops = { | ||||||
| 	.pg_init = bl_pg_init_write, | 	.pg_init = bl_pg_init_write, | ||||||
| 	.pg_test = bl_pg_test_write, | 	.pg_test = bl_pg_test_write, | ||||||
| 	.pg_doio = pnfs_generic_pg_writepages, | 	.pg_doio = pnfs_generic_pg_writepages, | ||||||
|  | 	.pg_cleanup = pnfs_generic_pg_cleanup, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct pnfs_layoutdriver_type blocklayout_type = { | static struct pnfs_layoutdriver_type blocklayout_type = { | ||||||
|  | |||||||
							
								
								
									
										112
									
								
								fs/nfs/direct.c
									
									
									
									
									
								
							
							
						
						
									
										112
									
								
								fs/nfs/direct.c
									
									
									
									
									
								
							| @ -66,6 +66,10 @@ static struct kmem_cache *nfs_direct_cachep; | |||||||
| /*
 | /*
 | ||||||
|  * This represents a set of asynchronous requests that we're waiting on |  * This represents a set of asynchronous requests that we're waiting on | ||||||
|  */ |  */ | ||||||
|  | struct nfs_direct_mirror { | ||||||
|  | 	ssize_t count; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct nfs_direct_req { | struct nfs_direct_req { | ||||||
| 	struct kref		kref;		/* release manager */ | 	struct kref		kref;		/* release manager */ | ||||||
| 
 | 
 | ||||||
| @ -78,8 +82,13 @@ struct nfs_direct_req { | |||||||
| 	/* completion state */ | 	/* completion state */ | ||||||
| 	atomic_t		io_count;	/* i/os we're waiting for */ | 	atomic_t		io_count;	/* i/os we're waiting for */ | ||||||
| 	spinlock_t		lock;		/* protect completion state */ | 	spinlock_t		lock;		/* protect completion state */ | ||||||
|  | 
 | ||||||
|  | 	struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; | ||||||
|  | 	int			mirror_count; | ||||||
|  | 
 | ||||||
| 	ssize_t			count,		/* bytes actually processed */ | 	ssize_t			count,		/* bytes actually processed */ | ||||||
| 				bytes_left,	/* bytes left to be sent */ | 				bytes_left,	/* bytes left to be sent */ | ||||||
|  | 				io_start,	/* start of IO */ | ||||||
| 				error;		/* any reported error */ | 				error;		/* any reported error */ | ||||||
| 	struct completion	completion;	/* wait for i/o completion */ | 	struct completion	completion;	/* wait for i/o completion */ | ||||||
| 
 | 
 | ||||||
| @ -108,26 +117,56 @@ static inline int put_dreq(struct nfs_direct_req *dreq) | |||||||
| 	return atomic_dec_and_test(&dreq->io_count); | 	return atomic_dec_and_test(&dreq->io_count); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) | ||||||
|  | { | ||||||
|  | 	dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); | ||||||
|  | 
 | ||||||
|  | static void | ||||||
|  | nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 	ssize_t count; | ||||||
|  | 
 | ||||||
|  | 	WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); | ||||||
|  | 
 | ||||||
|  | 	count = dreq->mirrors[hdr->pgio_mirror_idx].count; | ||||||
|  | 	if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { | ||||||
|  | 		count = hdr->io_start + hdr->good_bytes - dreq->io_start; | ||||||
|  | 		dreq->mirrors[hdr->pgio_mirror_idx].count = count; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* update the dreq->count by finding the minimum agreed count from all
 | ||||||
|  | 	 * mirrors */ | ||||||
|  | 	count = dreq->mirrors[0].count; | ||||||
|  | 
 | ||||||
|  | 	for (i = 1; i < dreq->mirror_count; i++) | ||||||
|  | 		count = min(count, dreq->mirrors[i].count); | ||||||
|  | 
 | ||||||
|  | 	dreq->count = count; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * nfs_direct_select_verf - select the right verifier |  * nfs_direct_select_verf - select the right verifier | ||||||
|  * @dreq - direct request possibly spanning multiple servers |  * @dreq - direct request possibly spanning multiple servers | ||||||
|  * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs |  * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs | ||||||
|  * @ds_idx - index of data server in data server list, only valid if ds_clp set |  * @commit_idx - commit bucket index for the DS | ||||||
|  * |  * | ||||||
|  * returns the correct verifier to use given the role of the server |  * returns the correct verifier to use given the role of the server | ||||||
|  */ |  */ | ||||||
| static struct nfs_writeverf * | static struct nfs_writeverf * | ||||||
| nfs_direct_select_verf(struct nfs_direct_req *dreq, | nfs_direct_select_verf(struct nfs_direct_req *dreq, | ||||||
| 		       struct nfs_client *ds_clp, | 		       struct nfs_client *ds_clp, | ||||||
| 		       int ds_idx) | 		       int commit_idx) | ||||||
| { | { | ||||||
| 	struct nfs_writeverf *verfp = &dreq->verf; | 	struct nfs_writeverf *verfp = &dreq->verf; | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_NFS_V4_1 | #ifdef CONFIG_NFS_V4_1 | ||||||
| 	if (ds_clp) { | 	if (ds_clp) { | ||||||
| 		/* pNFS is in use, use the DS verf */ | 		/* pNFS is in use, use the DS verf */ | ||||||
| 		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) | 		if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) | ||||||
| 			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; | 			verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; | ||||||
| 		else | 		else | ||||||
| 			WARN_ON_ONCE(1); | 			WARN_ON_ONCE(1); | ||||||
| 	} | 	} | ||||||
| @ -148,8 +187,7 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, | |||||||
| { | { | ||||||
| 	struct nfs_writeverf *verfp; | 	struct nfs_writeverf *verfp; | ||||||
| 
 | 
 | ||||||
| 	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, | 	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); | ||||||
| 				      hdr->ds_idx); |  | ||||||
| 	WARN_ON_ONCE(verfp->committed >= 0); | 	WARN_ON_ONCE(verfp->committed >= 0); | ||||||
| 	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | 	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | ||||||
| 	WARN_ON_ONCE(verfp->committed < 0); | 	WARN_ON_ONCE(verfp->committed < 0); | ||||||
| @ -169,8 +207,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | |||||||
| { | { | ||||||
| 	struct nfs_writeverf *verfp; | 	struct nfs_writeverf *verfp; | ||||||
| 
 | 
 | ||||||
| 	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, | 	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); | ||||||
| 					 hdr->ds_idx); |  | ||||||
| 	if (verfp->committed < 0) { | 	if (verfp->committed < 0) { | ||||||
| 		nfs_direct_set_hdr_verf(dreq, hdr); | 		nfs_direct_set_hdr_verf(dreq, hdr); | ||||||
| 		return 0; | 		return 0; | ||||||
| @ -193,7 +230,11 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | |||||||
| 
 | 
 | ||||||
| 	verfp = nfs_direct_select_verf(dreq, data->ds_clp, | 	verfp = nfs_direct_select_verf(dreq, data->ds_clp, | ||||||
| 					 data->ds_commit_index); | 					 data->ds_commit_index); | ||||||
| 	WARN_ON_ONCE(verfp->committed < 0); | 
 | ||||||
|  | 	/* verifier not set so always fail */ | ||||||
|  | 	if (verfp->committed < 0) | ||||||
|  | 		return 1; | ||||||
|  | 
 | ||||||
| 	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | 	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -249,6 +290,18 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, | |||||||
| 	cinfo->completion_ops = &nfs_direct_commit_completion_ops; | 	cinfo->completion_ops = &nfs_direct_commit_completion_ops; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, | ||||||
|  | 					     struct nfs_pageio_descriptor *pgio, | ||||||
|  | 					     struct nfs_page *req) | ||||||
|  | { | ||||||
|  | 	int mirror_count = 1; | ||||||
|  | 
 | ||||||
|  | 	if (pgio->pg_ops->pg_get_mirror_count) | ||||||
|  | 		mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); | ||||||
|  | 
 | ||||||
|  | 	dreq->mirror_count = mirror_count; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | ||||||
| { | { | ||||||
| 	struct nfs_direct_req *dreq; | 	struct nfs_direct_req *dreq; | ||||||
| @ -263,6 +316,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||||||
| 	INIT_LIST_HEAD(&dreq->mds_cinfo.list); | 	INIT_LIST_HEAD(&dreq->mds_cinfo.list); | ||||||
| 	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */ | 	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */ | ||||||
| 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); | 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); | ||||||
|  | 	dreq->mirror_count = 1; | ||||||
| 	spin_lock_init(&dreq->lock); | 	spin_lock_init(&dreq->lock); | ||||||
| 
 | 
 | ||||||
| 	return dreq; | 	return dreq; | ||||||
| @ -369,7 +423,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) | |||||||
| 	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) | 	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) | ||||||
| 		dreq->error = hdr->error; | 		dreq->error = hdr->error; | ||||||
| 	else | 	else | ||||||
| 		dreq->count += hdr->good_bytes; | 		nfs_direct_good_bytes(dreq, hdr); | ||||||
|  | 
 | ||||||
| 	spin_unlock(&dreq->lock); | 	spin_unlock(&dreq->lock); | ||||||
| 
 | 
 | ||||||
| 	while (!list_empty(&hdr->pages)) { | 	while (!list_empty(&hdr->pages)) { | ||||||
| @ -547,6 +602,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, | |||||||
| 
 | 
 | ||||||
| 	dreq->inode = inode; | 	dreq->inode = inode; | ||||||
| 	dreq->bytes_left = count; | 	dreq->bytes_left = count; | ||||||
|  | 	dreq->io_start = pos; | ||||||
| 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | ||||||
| 	l_ctx = nfs_get_lock_context(dreq->ctx); | 	l_ctx = nfs_get_lock_context(dreq->ctx); | ||||||
| 	if (IS_ERR(l_ctx)) { | 	if (IS_ERR(l_ctx)) { | ||||||
| @ -579,6 +635,20 @@ out: | |||||||
| 	return result; | 	return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void | ||||||
|  | nfs_direct_write_scan_commit_list(struct inode *inode, | ||||||
|  | 				  struct list_head *list, | ||||||
|  | 				  struct nfs_commit_info *cinfo) | ||||||
|  | { | ||||||
|  | 	spin_lock(cinfo->lock); | ||||||
|  | #ifdef CONFIG_NFS_V4_1 | ||||||
|  | 	if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) | ||||||
|  | 		NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); | ||||||
|  | #endif | ||||||
|  | 	nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); | ||||||
|  | 	spin_unlock(cinfo->lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | ||||||
| { | { | ||||||
| 	struct nfs_pageio_descriptor desc; | 	struct nfs_pageio_descriptor desc; | ||||||
| @ -586,20 +656,23 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||||||
| 	LIST_HEAD(reqs); | 	LIST_HEAD(reqs); | ||||||
| 	struct nfs_commit_info cinfo; | 	struct nfs_commit_info cinfo; | ||||||
| 	LIST_HEAD(failed); | 	LIST_HEAD(failed); | ||||||
|  | 	int i; | ||||||
| 
 | 
 | ||||||
| 	nfs_init_cinfo_from_dreq(&cinfo, dreq); | 	nfs_init_cinfo_from_dreq(&cinfo, dreq); | ||||||
| 	pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); | 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); | ||||||
| 	spin_lock(cinfo.lock); |  | ||||||
| 	nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); |  | ||||||
| 	spin_unlock(cinfo.lock); |  | ||||||
| 
 | 
 | ||||||
| 	dreq->count = 0; | 	dreq->count = 0; | ||||||
|  | 	for (i = 0; i < dreq->mirror_count; i++) | ||||||
|  | 		dreq->mirrors[i].count = 0; | ||||||
| 	get_dreq(dreq); | 	get_dreq(dreq); | ||||||
| 
 | 
 | ||||||
| 	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, | 	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, | ||||||
| 			      &nfs_direct_write_completion_ops); | 			      &nfs_direct_write_completion_ops); | ||||||
| 	desc.pg_dreq = dreq; | 	desc.pg_dreq = dreq; | ||||||
| 
 | 
 | ||||||
|  | 	req = nfs_list_entry(reqs.next); | ||||||
|  | 	nfs_direct_setup_mirroring(dreq, &desc, req); | ||||||
|  | 
 | ||||||
| 	list_for_each_entry_safe(req, tmp, &reqs, wb_list) { | 	list_for_each_entry_safe(req, tmp, &reqs, wb_list) { | ||||||
| 		if (!nfs_pageio_add_request(&desc, req)) { | 		if (!nfs_pageio_add_request(&desc, req)) { | ||||||
| 			nfs_list_remove_request(req); | 			nfs_list_remove_request(req); | ||||||
| @ -646,7 +719,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) | |||||||
| 		nfs_list_remove_request(req); | 		nfs_list_remove_request(req); | ||||||
| 		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { | 		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { | ||||||
| 			/* Note the rewrite will go through mds */ | 			/* Note the rewrite will go through mds */ | ||||||
| 			nfs_mark_request_commit(req, NULL, &cinfo); | 			nfs_mark_request_commit(req, NULL, &cinfo, 0); | ||||||
| 		} else | 		} else | ||||||
| 			nfs_release_request(req); | 			nfs_release_request(req); | ||||||
| 		nfs_unlock_and_release_request(req); | 		nfs_unlock_and_release_request(req); | ||||||
| @ -721,7 +794,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||||||
| 		dreq->error = hdr->error; | 		dreq->error = hdr->error; | ||||||
| 	} | 	} | ||||||
| 	if (dreq->error == 0) { | 	if (dreq->error == 0) { | ||||||
| 		dreq->count += hdr->good_bytes; | 		nfs_direct_good_bytes(dreq, hdr); | ||||||
| 		if (nfs_write_need_commit(hdr)) { | 		if (nfs_write_need_commit(hdr)) { | ||||||
| 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) | 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) | ||||||
| 				request_commit = true; | 				request_commit = true; | ||||||
| @ -745,7 +818,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||||||
| 		nfs_list_remove_request(req); | 		nfs_list_remove_request(req); | ||||||
| 		if (request_commit) { | 		if (request_commit) { | ||||||
| 			kref_get(&req->wb_kref); | 			kref_get(&req->wb_kref); | ||||||
| 			nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 			nfs_mark_request_commit(req, hdr->lseg, &cinfo, | ||||||
|  | 				hdr->ds_commit_idx); | ||||||
| 		} | 		} | ||||||
| 		nfs_unlock_and_release_request(req); | 		nfs_unlock_and_release_request(req); | ||||||
| 	} | 	} | ||||||
| @ -826,6 +900,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||||||
| 				result = PTR_ERR(req); | 				result = PTR_ERR(req); | ||||||
| 				break; | 				break; | ||||||
| 			} | 			} | ||||||
|  | 
 | ||||||
|  | 			nfs_direct_setup_mirroring(dreq, &desc, req); | ||||||
|  | 
 | ||||||
| 			nfs_lock_request(req); | 			nfs_lock_request(req); | ||||||
| 			req->wb_index = pos >> PAGE_SHIFT; | 			req->wb_index = pos >> PAGE_SHIFT; | ||||||
| 			req->wb_offset = pos & ~PAGE_MASK; | 			req->wb_offset = pos & ~PAGE_MASK; | ||||||
| @ -934,6 +1011,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, | |||||||
| 
 | 
 | ||||||
| 	dreq->inode = inode; | 	dreq->inode = inode; | ||||||
| 	dreq->bytes_left = count; | 	dreq->bytes_left = count; | ||||||
|  | 	dreq->io_start = pos; | ||||||
| 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | ||||||
| 	l_ctx = nfs_get_lock_context(dreq->ctx); | 	l_ctx = nfs_get_lock_context(dreq->ctx); | ||||||
| 	if (IS_ERR(l_ctx)) { | 	if (IS_ERR(l_ctx)) { | ||||||
|  | |||||||
| @ -118,13 +118,6 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) |  | ||||||
| { |  | ||||||
| 	if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) |  | ||||||
| 		return; |  | ||||||
| 	pnfs_return_layout(inode); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int filelayout_async_handle_error(struct rpc_task *task, | static int filelayout_async_handle_error(struct rpc_task *task, | ||||||
| 					 struct nfs4_state *state, | 					 struct nfs4_state *state, | ||||||
| 					 struct nfs_client *clp, | 					 struct nfs_client *clp, | ||||||
| @ -207,7 +200,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||||||
| 		dprintk("%s DS connection error %d\n", __func__, | 		dprintk("%s DS connection error %d\n", __func__, | ||||||
| 			task->tk_status); | 			task->tk_status); | ||||||
| 		nfs4_mark_deviceid_unavailable(devid); | 		nfs4_mark_deviceid_unavailable(devid); | ||||||
| 		set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | 		pnfs_error_mark_layout_for_return(inode, lseg); | ||||||
| 		rpc_wake_up(&tbl->slot_tbl_waitq); | 		rpc_wake_up(&tbl->slot_tbl_waitq); | ||||||
| 		/* fall through */ | 		/* fall through */ | ||||||
| 	default: | 	default: | ||||||
| @ -339,16 +332,6 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data) | |||||||
| 	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); | 	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void filelayout_read_release(void *data) |  | ||||||
| { |  | ||||||
| 	struct nfs_pgio_header *hdr = data; |  | ||||||
| 	struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; |  | ||||||
| 
 |  | ||||||
| 	filelayout_fenceme(lo->plh_inode, lo); |  | ||||||
| 	nfs_put_client(hdr->ds_clp); |  | ||||||
| 	hdr->mds_ops->rpc_release(data); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int filelayout_write_done_cb(struct rpc_task *task, | static int filelayout_write_done_cb(struct rpc_task *task, | ||||||
| 				struct nfs_pgio_header *hdr) | 				struct nfs_pgio_header *hdr) | ||||||
| { | { | ||||||
| @ -371,17 +354,6 @@ static int filelayout_write_done_cb(struct rpc_task *task, | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Fake up some data that will cause nfs_commit_release to retry the writes. */ |  | ||||||
| static void prepare_to_resend_writes(struct nfs_commit_data *data) |  | ||||||
| { |  | ||||||
| 	struct nfs_page *first = nfs_list_entry(data->pages.next); |  | ||||||
| 
 |  | ||||||
| 	data->task.tk_status = 0; |  | ||||||
| 	memcpy(&data->verf.verifier, &first->wb_verf, |  | ||||||
| 	       sizeof(data->verf.verifier)); |  | ||||||
| 	data->verf.verifier.data[0]++; /* ensure verifier mismatch */ |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int filelayout_commit_done_cb(struct rpc_task *task, | static int filelayout_commit_done_cb(struct rpc_task *task, | ||||||
| 				     struct nfs_commit_data *data) | 				     struct nfs_commit_data *data) | ||||||
| { | { | ||||||
| @ -393,7 +365,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||||||
| 
 | 
 | ||||||
| 	switch (err) { | 	switch (err) { | ||||||
| 	case -NFS4ERR_RESET_TO_MDS: | 	case -NFS4ERR_RESET_TO_MDS: | ||||||
| 		prepare_to_resend_writes(data); | 		pnfs_generic_prepare_to_resend_writes(data); | ||||||
| 		return -EAGAIN; | 		return -EAGAIN; | ||||||
| 	case -EAGAIN: | 	case -EAGAIN: | ||||||
| 		rpc_restart_call_prepare(task); | 		rpc_restart_call_prepare(task); | ||||||
| @ -451,16 +423,6 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data) | |||||||
| 	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); | 	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void filelayout_write_release(void *data) |  | ||||||
| { |  | ||||||
| 	struct nfs_pgio_header *hdr = data; |  | ||||||
| 	struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; |  | ||||||
| 
 |  | ||||||
| 	filelayout_fenceme(lo->plh_inode, lo); |  | ||||||
| 	nfs_put_client(hdr->ds_clp); |  | ||||||
| 	hdr->mds_ops->rpc_release(data); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void filelayout_commit_prepare(struct rpc_task *task, void *data) | static void filelayout_commit_prepare(struct rpc_task *task, void *data) | ||||||
| { | { | ||||||
| 	struct nfs_commit_data *wdata = data; | 	struct nfs_commit_data *wdata = data; | ||||||
| @ -471,14 +433,6 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data) | |||||||
| 			task); | 			task); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void filelayout_write_commit_done(struct rpc_task *task, void *data) |  | ||||||
| { |  | ||||||
| 	struct nfs_commit_data *wdata = data; |  | ||||||
| 
 |  | ||||||
| 	/* Note this may cause RPC to be resent */ |  | ||||||
| 	wdata->mds_ops->rpc_call_done(task, data); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void filelayout_commit_count_stats(struct rpc_task *task, void *data) | static void filelayout_commit_count_stats(struct rpc_task *task, void *data) | ||||||
| { | { | ||||||
| 	struct nfs_commit_data *cdata = data; | 	struct nfs_commit_data *cdata = data; | ||||||
| @ -486,35 +440,25 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) | |||||||
| 	rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); | 	rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void filelayout_commit_release(void *calldata) |  | ||||||
| { |  | ||||||
| 	struct nfs_commit_data *data = calldata; |  | ||||||
| 
 |  | ||||||
| 	data->completion_ops->completion(data); |  | ||||||
| 	pnfs_put_lseg(data->lseg); |  | ||||||
| 	nfs_put_client(data->ds_clp); |  | ||||||
| 	nfs_commitdata_release(data); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static const struct rpc_call_ops filelayout_read_call_ops = { | static const struct rpc_call_ops filelayout_read_call_ops = { | ||||||
| 	.rpc_call_prepare = filelayout_read_prepare, | 	.rpc_call_prepare = filelayout_read_prepare, | ||||||
| 	.rpc_call_done = filelayout_read_call_done, | 	.rpc_call_done = filelayout_read_call_done, | ||||||
| 	.rpc_count_stats = filelayout_read_count_stats, | 	.rpc_count_stats = filelayout_read_count_stats, | ||||||
| 	.rpc_release = filelayout_read_release, | 	.rpc_release = pnfs_generic_rw_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct rpc_call_ops filelayout_write_call_ops = { | static const struct rpc_call_ops filelayout_write_call_ops = { | ||||||
| 	.rpc_call_prepare = filelayout_write_prepare, | 	.rpc_call_prepare = filelayout_write_prepare, | ||||||
| 	.rpc_call_done = filelayout_write_call_done, | 	.rpc_call_done = filelayout_write_call_done, | ||||||
| 	.rpc_count_stats = filelayout_write_count_stats, | 	.rpc_count_stats = filelayout_write_count_stats, | ||||||
| 	.rpc_release = filelayout_write_release, | 	.rpc_release = pnfs_generic_rw_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct rpc_call_ops filelayout_commit_call_ops = { | static const struct rpc_call_ops filelayout_commit_call_ops = { | ||||||
| 	.rpc_call_prepare = filelayout_commit_prepare, | 	.rpc_call_prepare = filelayout_commit_prepare, | ||||||
| 	.rpc_call_done = filelayout_write_commit_done, | 	.rpc_call_done = pnfs_generic_write_commit_done, | ||||||
| 	.rpc_count_stats = filelayout_commit_count_stats, | 	.rpc_count_stats = filelayout_commit_count_stats, | ||||||
| 	.rpc_release = filelayout_commit_release, | 	.rpc_release = pnfs_generic_commit_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static enum pnfs_try_status | static enum pnfs_try_status | ||||||
| @ -548,7 +492,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) | |||||||
| 	/* No multipath support. Use first DS */ | 	/* No multipath support. Use first DS */ | ||||||
| 	atomic_inc(&ds->ds_clp->cl_count); | 	atomic_inc(&ds->ds_clp->cl_count); | ||||||
| 	hdr->ds_clp = ds->ds_clp; | 	hdr->ds_clp = ds->ds_clp; | ||||||
| 	hdr->ds_idx = idx; | 	hdr->ds_commit_idx = idx; | ||||||
| 	fh = nfs4_fl_select_ds_fh(lseg, j); | 	fh = nfs4_fl_select_ds_fh(lseg, j); | ||||||
| 	if (fh) | 	if (fh) | ||||||
| 		hdr->args.fh = fh; | 		hdr->args.fh = fh; | ||||||
| @ -557,8 +501,9 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) | |||||||
| 	hdr->mds_offset = offset; | 	hdr->mds_offset = offset; | ||||||
| 
 | 
 | ||||||
| 	/* Perform an asynchronous read to ds */ | 	/* Perform an asynchronous read to ds */ | ||||||
| 	nfs_initiate_pgio(ds_clnt, hdr, | 	nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, | ||||||
| 			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); | 			  NFS_PROTO(hdr->inode), &filelayout_read_call_ops, | ||||||
|  | 			  0, RPC_TASK_SOFTCONN); | ||||||
| 	return PNFS_ATTEMPTED; | 	return PNFS_ATTEMPTED; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -591,16 +536,16 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | |||||||
| 	hdr->pgio_done_cb = filelayout_write_done_cb; | 	hdr->pgio_done_cb = filelayout_write_done_cb; | ||||||
| 	atomic_inc(&ds->ds_clp->cl_count); | 	atomic_inc(&ds->ds_clp->cl_count); | ||||||
| 	hdr->ds_clp = ds->ds_clp; | 	hdr->ds_clp = ds->ds_clp; | ||||||
| 	hdr->ds_idx = idx; | 	hdr->ds_commit_idx = idx; | ||||||
| 	fh = nfs4_fl_select_ds_fh(lseg, j); | 	fh = nfs4_fl_select_ds_fh(lseg, j); | ||||||
| 	if (fh) | 	if (fh) | ||||||
| 		hdr->args.fh = fh; | 		hdr->args.fh = fh; | ||||||
| 	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); | 	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); | ||||||
| 
 | 
 | ||||||
| 	/* Perform an asynchronous write */ | 	/* Perform an asynchronous write */ | ||||||
| 	nfs_initiate_pgio(ds_clnt, hdr, | 	nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, | ||||||
| 				    &filelayout_write_call_ops, sync, | 			  NFS_PROTO(hdr->inode), &filelayout_write_call_ops, | ||||||
| 				    RPC_TASK_SOFTCONN); | 			  sync, RPC_TASK_SOFTCONN); | ||||||
| 	return PNFS_ATTEMPTED; | 	return PNFS_ATTEMPTED; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -988,12 +933,14 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = { | |||||||
| 	.pg_init = filelayout_pg_init_read, | 	.pg_init = filelayout_pg_init_read, | ||||||
| 	.pg_test = filelayout_pg_test, | 	.pg_test = filelayout_pg_test, | ||||||
| 	.pg_doio = pnfs_generic_pg_readpages, | 	.pg_doio = pnfs_generic_pg_readpages, | ||||||
|  | 	.pg_cleanup = pnfs_generic_pg_cleanup, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct nfs_pageio_ops filelayout_pg_write_ops = { | static const struct nfs_pageio_ops filelayout_pg_write_ops = { | ||||||
| 	.pg_init = filelayout_pg_init_write, | 	.pg_init = filelayout_pg_init_write, | ||||||
| 	.pg_test = filelayout_pg_test, | 	.pg_test = filelayout_pg_test, | ||||||
| 	.pg_doio = pnfs_generic_pg_writepages, | 	.pg_doio = pnfs_generic_pg_writepages, | ||||||
|  | 	.pg_cleanup = pnfs_generic_pg_cleanup, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | ||||||
| @ -1004,37 +951,11 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | |||||||
| 		return j; | 		return j; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* The generic layer is about to remove the req from the commit list.
 |  | ||||||
|  * If this will make the bucket empty, it will need to put the lseg reference. |  | ||||||
|  * Note this is must be called holding the inode (/cinfo) lock |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| filelayout_clear_request_commit(struct nfs_page *req, |  | ||||||
| 				struct nfs_commit_info *cinfo) |  | ||||||
| { |  | ||||||
| 	struct pnfs_layout_segment *freeme = NULL; |  | ||||||
| 
 |  | ||||||
| 	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) |  | ||||||
| 		goto out; |  | ||||||
| 	cinfo->ds->nwritten--; |  | ||||||
| 	if (list_is_singular(&req->wb_list)) { |  | ||||||
| 		struct pnfs_commit_bucket *bucket; |  | ||||||
| 
 |  | ||||||
| 		bucket = list_first_entry(&req->wb_list, |  | ||||||
| 					  struct pnfs_commit_bucket, |  | ||||||
| 					  written); |  | ||||||
| 		freeme = bucket->wlseg; |  | ||||||
| 		bucket->wlseg = NULL; |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	nfs_request_remove_commit_list(req, cinfo); |  | ||||||
| 	pnfs_put_lseg_locked(freeme); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void | static void | ||||||
| filelayout_mark_request_commit(struct nfs_page *req, | filelayout_mark_request_commit(struct nfs_page *req, | ||||||
| 			       struct pnfs_layout_segment *lseg, | 			       struct pnfs_layout_segment *lseg, | ||||||
| 			       struct nfs_commit_info *cinfo) | 			       struct nfs_commit_info *cinfo, | ||||||
|  | 			       u32 ds_commit_idx) | ||||||
| 
 | 
 | ||||||
| { | { | ||||||
| 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||||||
| @ -1064,7 +985,7 @@ filelayout_mark_request_commit(struct nfs_page *req, | |||||||
| 		 * is normally transferred to the COMMIT call and released | 		 * is normally transferred to the COMMIT call and released | ||||||
| 		 * there.  It could also be released if the last req is pulled | 		 * there.  It could also be released if the last req is pulled | ||||||
| 		 * off due to a rewrite, in which case it will be done in | 		 * off due to a rewrite, in which case it will be done in | ||||||
| 		 * filelayout_clear_request_commit | 		 * pnfs_generic_clear_request_commit | ||||||
| 		 */ | 		 */ | ||||||
| 		buckets[i].wlseg = pnfs_get_lseg(lseg); | 		buckets[i].wlseg = pnfs_get_lseg(lseg); | ||||||
| 	} | 	} | ||||||
| @ -1138,101 +1059,15 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) | |||||||
| 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | ||||||
| 	if (fh) | 	if (fh) | ||||||
| 		data->args.fh = fh; | 		data->args.fh = fh; | ||||||
| 	return nfs_initiate_commit(ds_clnt, data, | 	return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), | ||||||
| 				   &filelayout_commit_call_ops, how, | 				   &filelayout_commit_call_ops, how, | ||||||
| 				   RPC_TASK_SOFTCONN); | 				   RPC_TASK_SOFTCONN); | ||||||
| out_err: | out_err: | ||||||
| 	prepare_to_resend_writes(data); | 	pnfs_generic_prepare_to_resend_writes(data); | ||||||
| 	filelayout_commit_release(data); | 	pnfs_generic_commit_release(data); | ||||||
| 	return -EAGAIN; | 	return -EAGAIN; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int |  | ||||||
| transfer_commit_list(struct list_head *src, struct list_head *dst, |  | ||||||
| 		     struct nfs_commit_info *cinfo, int max) |  | ||||||
| { |  | ||||||
| 	struct nfs_page *req, *tmp; |  | ||||||
| 	int ret = 0; |  | ||||||
| 
 |  | ||||||
| 	list_for_each_entry_safe(req, tmp, src, wb_list) { |  | ||||||
| 		if (!nfs_lock_request(req)) |  | ||||||
| 			continue; |  | ||||||
| 		kref_get(&req->wb_kref); |  | ||||||
| 		if (cond_resched_lock(cinfo->lock)) |  | ||||||
| 			list_safe_reset_next(req, tmp, wb_list); |  | ||||||
| 		nfs_request_remove_commit_list(req, cinfo); |  | ||||||
| 		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); |  | ||||||
| 		nfs_list_add_request(req, dst); |  | ||||||
| 		ret++; |  | ||||||
| 		if ((ret == max) && !cinfo->dreq) |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 	return ret; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* Note called with cinfo->lock held. */ |  | ||||||
| static int |  | ||||||
| filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, |  | ||||||
| 			       struct nfs_commit_info *cinfo, |  | ||||||
| 			       int max) |  | ||||||
| { |  | ||||||
| 	struct list_head *src = &bucket->written; |  | ||||||
| 	struct list_head *dst = &bucket->committing; |  | ||||||
| 	int ret; |  | ||||||
| 
 |  | ||||||
| 	ret = transfer_commit_list(src, dst, cinfo, max); |  | ||||||
| 	if (ret) { |  | ||||||
| 		cinfo->ds->nwritten -= ret; |  | ||||||
| 		cinfo->ds->ncommitting += ret; |  | ||||||
| 		bucket->clseg = bucket->wlseg; |  | ||||||
| 		if (list_empty(src)) |  | ||||||
| 			bucket->wlseg = NULL; |  | ||||||
| 		else |  | ||||||
| 			pnfs_get_lseg(bucket->clseg); |  | ||||||
| 	} |  | ||||||
| 	return ret; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* Move reqs from written to committing lists, returning count of number moved.
 |  | ||||||
|  * Note called with cinfo->lock held. |  | ||||||
|  */ |  | ||||||
| static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, |  | ||||||
| 					int max) |  | ||||||
| { |  | ||||||
| 	int i, rv = 0, cnt; |  | ||||||
| 
 |  | ||||||
| 	for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { |  | ||||||
| 		cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], |  | ||||||
| 						     cinfo, max); |  | ||||||
| 		max -= cnt; |  | ||||||
| 		rv += cnt; |  | ||||||
| 	} |  | ||||||
| 	return rv; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* Pull everything off the committing lists and dump into @dst */ |  | ||||||
| static void filelayout_recover_commit_reqs(struct list_head *dst, |  | ||||||
| 					   struct nfs_commit_info *cinfo) |  | ||||||
| { |  | ||||||
| 	struct pnfs_commit_bucket *b; |  | ||||||
| 	struct pnfs_layout_segment *freeme; |  | ||||||
| 	int i; |  | ||||||
| 
 |  | ||||||
| restart: |  | ||||||
| 	spin_lock(cinfo->lock); |  | ||||||
| 	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { |  | ||||||
| 		if (transfer_commit_list(&b->written, dst, cinfo, 0)) { |  | ||||||
| 			freeme = b->wlseg; |  | ||||||
| 			b->wlseg = NULL; |  | ||||||
| 			spin_unlock(cinfo->lock); |  | ||||||
| 			pnfs_put_lseg(freeme); |  | ||||||
| 			goto restart; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	cinfo->ds->nwritten = 0; |  | ||||||
| 	spin_unlock(cinfo->lock); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
 | /* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
 | ||||||
|  *				   for @page |  *				   for @page | ||||||
|  * @cinfo - commit info for current inode |  * @cinfo - commit info for current inode | ||||||
| @ -1263,108 +1098,14 @@ filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) | |||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) |  | ||||||
| { |  | ||||||
| 	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; |  | ||||||
| 	struct pnfs_commit_bucket *bucket; |  | ||||||
| 	struct pnfs_layout_segment *freeme; |  | ||||||
| 	int i; |  | ||||||
| 
 |  | ||||||
| 	for (i = idx; i < fl_cinfo->nbuckets; i++) { |  | ||||||
| 		bucket = &fl_cinfo->buckets[i]; |  | ||||||
| 		if (list_empty(&bucket->committing)) |  | ||||||
| 			continue; |  | ||||||
| 		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); |  | ||||||
| 		spin_lock(cinfo->lock); |  | ||||||
| 		freeme = bucket->clseg; |  | ||||||
| 		bucket->clseg = NULL; |  | ||||||
| 		spin_unlock(cinfo->lock); |  | ||||||
| 		pnfs_put_lseg(freeme); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static unsigned int |  | ||||||
| alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) |  | ||||||
| { |  | ||||||
| 	struct pnfs_ds_commit_info *fl_cinfo; |  | ||||||
| 	struct pnfs_commit_bucket *bucket; |  | ||||||
| 	struct nfs_commit_data *data; |  | ||||||
| 	int i; |  | ||||||
| 	unsigned int nreq = 0; |  | ||||||
| 
 |  | ||||||
| 	fl_cinfo = cinfo->ds; |  | ||||||
| 	bucket = fl_cinfo->buckets; |  | ||||||
| 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { |  | ||||||
| 		if (list_empty(&bucket->committing)) |  | ||||||
| 			continue; |  | ||||||
| 		data = nfs_commitdata_alloc(); |  | ||||||
| 		if (!data) |  | ||||||
| 			break; |  | ||||||
| 		data->ds_commit_index = i; |  | ||||||
| 		spin_lock(cinfo->lock); |  | ||||||
| 		data->lseg = bucket->clseg; |  | ||||||
| 		bucket->clseg = NULL; |  | ||||||
| 		spin_unlock(cinfo->lock); |  | ||||||
| 		list_add(&data->pages, list); |  | ||||||
| 		nreq++; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/* Clean up on error */ |  | ||||||
| 	filelayout_retry_commit(cinfo, i); |  | ||||||
| 	/* Caller will clean up entries put on list */ |  | ||||||
| 	return nreq; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* This follows nfs_commit_list pretty closely */ |  | ||||||
| static int | static int | ||||||
| filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||||||
| 			   int how, struct nfs_commit_info *cinfo) | 			   int how, struct nfs_commit_info *cinfo) | ||||||
| { | { | ||||||
| 	struct nfs_commit_data *data, *tmp; | 	return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, | ||||||
| 	LIST_HEAD(list); | 					    filelayout_initiate_commit); | ||||||
| 	unsigned int nreq = 0; |  | ||||||
| 
 |  | ||||||
| 	if (!list_empty(mds_pages)) { |  | ||||||
| 		data = nfs_commitdata_alloc(); |  | ||||||
| 		if (data != NULL) { |  | ||||||
| 			data->lseg = NULL; |  | ||||||
| 			list_add(&data->pages, &list); |  | ||||||
| 			nreq++; |  | ||||||
| 		} else { |  | ||||||
| 			nfs_retry_commit(mds_pages, NULL, cinfo); |  | ||||||
| 			filelayout_retry_commit(cinfo, 0); |  | ||||||
| 			cinfo->completion_ops->error_cleanup(NFS_I(inode)); |  | ||||||
| 			return -ENOMEM; |  | ||||||
| 		} |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 	nreq += alloc_ds_commits(cinfo, &list); |  | ||||||
| 
 |  | ||||||
| 	if (nreq == 0) { |  | ||||||
| 		cinfo->completion_ops->error_cleanup(NFS_I(inode)); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	atomic_add(nreq, &cinfo->mds->rpcs_out); |  | ||||||
| 
 |  | ||||||
| 	list_for_each_entry_safe(data, tmp, &list, pages) { |  | ||||||
| 		list_del_init(&data->pages); |  | ||||||
| 		if (!data->lseg) { |  | ||||||
| 			nfs_init_commit(data, mds_pages, NULL, cinfo); |  | ||||||
| 			nfs_initiate_commit(NFS_CLIENT(inode), data, |  | ||||||
| 					    data->mds_ops, how, 0); |  | ||||||
| 		} else { |  | ||||||
| 			struct pnfs_commit_bucket *buckets; |  | ||||||
| 
 |  | ||||||
| 			buckets = cinfo->ds->buckets; |  | ||||||
| 			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); |  | ||||||
| 			filelayout_initiate_commit(data, how); |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	cinfo->ds->ncommitting = 0; |  | ||||||
| 	return PNFS_ATTEMPTED; |  | ||||||
| } |  | ||||||
| static struct nfs4_deviceid_node * | static struct nfs4_deviceid_node * | ||||||
| filelayout_alloc_deviceid_node(struct nfs_server *server, | filelayout_alloc_deviceid_node(struct nfs_server *server, | ||||||
| 		struct pnfs_device *pdev, gfp_t gfp_flags) | 		struct pnfs_device *pdev, gfp_t gfp_flags) | ||||||
| @ -1421,9 +1162,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||||||
| 	.pg_write_ops		= &filelayout_pg_write_ops, | 	.pg_write_ops		= &filelayout_pg_write_ops, | ||||||
| 	.get_ds_info		= &filelayout_get_ds_info, | 	.get_ds_info		= &filelayout_get_ds_info, | ||||||
| 	.mark_request_commit	= filelayout_mark_request_commit, | 	.mark_request_commit	= filelayout_mark_request_commit, | ||||||
| 	.clear_request_commit	= filelayout_clear_request_commit, | 	.clear_request_commit	= pnfs_generic_clear_request_commit, | ||||||
| 	.scan_commit_lists	= filelayout_scan_commit_lists, | 	.scan_commit_lists	= pnfs_generic_scan_commit_lists, | ||||||
| 	.recover_commit_reqs	= filelayout_recover_commit_reqs, | 	.recover_commit_reqs	= pnfs_generic_recover_commit_reqs, | ||||||
| 	.search_commit_reqs	= filelayout_search_commit_reqs, | 	.search_commit_reqs	= filelayout_search_commit_reqs, | ||||||
| 	.commit_pagelist	= filelayout_commit_pagelist, | 	.commit_pagelist	= filelayout_commit_pagelist, | ||||||
| 	.read_pagelist		= filelayout_read_pagelist, | 	.read_pagelist		= filelayout_read_pagelist, | ||||||
|  | |||||||
| @ -32,13 +32,6 @@ | |||||||
| 
 | 
 | ||||||
| #include "../pnfs.h" | #include "../pnfs.h" | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Default data server connection timeout and retrans vaules. |  | ||||||
|  * Set by module paramters dataserver_timeo and dataserver_retrans. |  | ||||||
|  */ |  | ||||||
| #define NFS4_DEF_DS_TIMEO   600 /* in tenths of a second */ |  | ||||||
| #define NFS4_DEF_DS_RETRANS 5 |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Field testing shows we need to support up to 4096 stripe indices. |  * Field testing shows we need to support up to 4096 stripe indices. | ||||||
|  * We store each index as a u8 (u32 on the wire) to keep the memory footprint |  * We store each index as a u8 (u32 on the wire) to keep the memory footprint | ||||||
| @ -48,32 +41,11 @@ | |||||||
| #define NFS4_PNFS_MAX_STRIPE_CNT 4096 | #define NFS4_PNFS_MAX_STRIPE_CNT 4096 | ||||||
| #define NFS4_PNFS_MAX_MULTI_CNT  256 /* 256 fit into a u8 stripe_index */ | #define NFS4_PNFS_MAX_MULTI_CNT  256 /* 256 fit into a u8 stripe_index */ | ||||||
| 
 | 
 | ||||||
| /* error codes for internal use */ |  | ||||||
| #define NFS4ERR_RESET_TO_MDS   12001 |  | ||||||
| 
 |  | ||||||
| enum stripetype4 { | enum stripetype4 { | ||||||
| 	STRIPE_SPARSE = 1, | 	STRIPE_SPARSE = 1, | ||||||
| 	STRIPE_DENSE = 2 | 	STRIPE_DENSE = 2 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /* Individual ip address */ |  | ||||||
| struct nfs4_pnfs_ds_addr { |  | ||||||
| 	struct sockaddr_storage	da_addr; |  | ||||||
| 	size_t			da_addrlen; |  | ||||||
| 	struct list_head	da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */ |  | ||||||
| 	char			*da_remotestr;	/* human readable addr+port */ |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct nfs4_pnfs_ds { |  | ||||||
| 	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */ |  | ||||||
| 	char			*ds_remotestr;	/* comma sep list of addrs */ |  | ||||||
| 	struct list_head	ds_addrs; |  | ||||||
| 	struct nfs_client	*ds_clp; |  | ||||||
| 	atomic_t		ds_count; |  | ||||||
| 	unsigned long		ds_state; |  | ||||||
| #define NFS4DS_CONNECTING	0	/* ds is establishing connection */ |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct nfs4_file_layout_dsaddr { | struct nfs4_file_layout_dsaddr { | ||||||
| 	struct nfs4_deviceid_node	id_node; | 	struct nfs4_deviceid_node	id_node; | ||||||
| 	u32				stripe_count; | 	u32				stripe_count; | ||||||
| @ -119,17 +91,6 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) | |||||||
| 	return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; | 	return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void |  | ||||||
| filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) |  | ||||||
| { |  | ||||||
| 	u32 *p = (u32 *)&node->deviceid; |  | ||||||
| 
 |  | ||||||
| 	printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", |  | ||||||
| 		p[0], p[1], p[2], p[3]); |  | ||||||
| 
 |  | ||||||
| 	set_bit(NFS_DEVICEID_INVALID, &node->flags); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline bool | static inline bool | ||||||
| filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) | filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) | ||||||
| { | { | ||||||
| @ -142,7 +103,6 @@ filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); | |||||||
| extern struct nfs_fh * | extern struct nfs_fh * | ||||||
| nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | ||||||
| 
 | 
 | ||||||
| extern void print_ds(struct nfs4_pnfs_ds *ds); |  | ||||||
| u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | ||||||
| u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | ||||||
| struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | ||||||
|  | |||||||
| @ -31,7 +31,6 @@ | |||||||
| #include <linux/nfs_fs.h> | #include <linux/nfs_fs.h> | ||||||
| #include <linux/vmalloc.h> | #include <linux/vmalloc.h> | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/sunrpc/addr.h> |  | ||||||
| 
 | 
 | ||||||
| #include "../internal.h" | #include "../internal.h" | ||||||
| #include "../nfs4session.h" | #include "../nfs4session.h" | ||||||
| @ -42,183 +41,6 @@ | |||||||
| static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | ||||||
| static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Data server cache |  | ||||||
|  * |  | ||||||
|  * Data servers can be mapped to different device ids. |  | ||||||
|  * nfs4_pnfs_ds reference counting |  | ||||||
|  *   - set to 1 on allocation |  | ||||||
|  *   - incremented when a device id maps a data server already in the cache. |  | ||||||
|  *   - decremented when deviceid is removed from the cache. |  | ||||||
|  */ |  | ||||||
| static DEFINE_SPINLOCK(nfs4_ds_cache_lock); |  | ||||||
| static LIST_HEAD(nfs4_data_server_cache); |  | ||||||
| 
 |  | ||||||
| /* Debug routines */ |  | ||||||
| void |  | ||||||
| print_ds(struct nfs4_pnfs_ds *ds) |  | ||||||
| { |  | ||||||
| 	if (ds == NULL) { |  | ||||||
| 		printk("%s NULL device\n", __func__); |  | ||||||
| 		return; |  | ||||||
| 	} |  | ||||||
| 	printk("        ds %s\n" |  | ||||||
| 		"        ref count %d\n" |  | ||||||
| 		"        client %p\n" |  | ||||||
| 		"        cl_exchange_flags %x\n", |  | ||||||
| 		ds->ds_remotestr, |  | ||||||
| 		atomic_read(&ds->ds_count), ds->ds_clp, |  | ||||||
| 		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static bool |  | ||||||
| same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) |  | ||||||
| { |  | ||||||
| 	struct sockaddr_in *a, *b; |  | ||||||
| 	struct sockaddr_in6 *a6, *b6; |  | ||||||
| 
 |  | ||||||
| 	if (addr1->sa_family != addr2->sa_family) |  | ||||||
| 		return false; |  | ||||||
| 
 |  | ||||||
| 	switch (addr1->sa_family) { |  | ||||||
| 	case AF_INET: |  | ||||||
| 		a = (struct sockaddr_in *)addr1; |  | ||||||
| 		b = (struct sockaddr_in *)addr2; |  | ||||||
| 
 |  | ||||||
| 		if (a->sin_addr.s_addr == b->sin_addr.s_addr && |  | ||||||
| 		    a->sin_port == b->sin_port) |  | ||||||
| 			return true; |  | ||||||
| 		break; |  | ||||||
| 
 |  | ||||||
| 	case AF_INET6: |  | ||||||
| 		a6 = (struct sockaddr_in6 *)addr1; |  | ||||||
| 		b6 = (struct sockaddr_in6 *)addr2; |  | ||||||
| 
 |  | ||||||
| 		/* LINKLOCAL addresses must have matching scope_id */ |  | ||||||
| 		if (ipv6_addr_src_scope(&a6->sin6_addr) == |  | ||||||
| 		    IPV6_ADDR_SCOPE_LINKLOCAL && |  | ||||||
| 		    a6->sin6_scope_id != b6->sin6_scope_id) |  | ||||||
| 			return false; |  | ||||||
| 
 |  | ||||||
| 		if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && |  | ||||||
| 		    a6->sin6_port == b6->sin6_port) |  | ||||||
| 			return true; |  | ||||||
| 		break; |  | ||||||
| 
 |  | ||||||
| 	default: |  | ||||||
| 		dprintk("%s: unhandled address family: %u\n", |  | ||||||
| 			__func__, addr1->sa_family); |  | ||||||
| 		return false; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static bool |  | ||||||
| _same_data_server_addrs_locked(const struct list_head *dsaddrs1, |  | ||||||
| 			       const struct list_head *dsaddrs2) |  | ||||||
| { |  | ||||||
| 	struct nfs4_pnfs_ds_addr *da1, *da2; |  | ||||||
| 
 |  | ||||||
| 	/* step through both lists, comparing as we go */ |  | ||||||
| 	for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), |  | ||||||
| 	     da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); |  | ||||||
| 	     da1 != NULL && da2 != NULL; |  | ||||||
| 	     da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), |  | ||||||
| 	     da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { |  | ||||||
| 		if (!same_sockaddr((struct sockaddr *)&da1->da_addr, |  | ||||||
| 				   (struct sockaddr *)&da2->da_addr)) |  | ||||||
| 			return false; |  | ||||||
| 	} |  | ||||||
| 	if (da1 == NULL && da2 == NULL) |  | ||||||
| 		return true; |  | ||||||
| 
 |  | ||||||
| 	return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Lookup DS by addresses.  nfs4_ds_cache_lock is held |  | ||||||
|  */ |  | ||||||
| static struct nfs4_pnfs_ds * |  | ||||||
| _data_server_lookup_locked(const struct list_head *dsaddrs) |  | ||||||
| { |  | ||||||
| 	struct nfs4_pnfs_ds *ds; |  | ||||||
| 
 |  | ||||||
| 	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) |  | ||||||
| 		if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) |  | ||||||
| 			return ds; |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Create an rpc connection to the nfs4_pnfs_ds data server |  | ||||||
|  * Currently only supports IPv4 and IPv6 addresses |  | ||||||
|  */ |  | ||||||
| static int |  | ||||||
| nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) |  | ||||||
| { |  | ||||||
| 	struct nfs_client *clp = ERR_PTR(-EIO); |  | ||||||
| 	struct nfs4_pnfs_ds_addr *da; |  | ||||||
| 	int status = 0; |  | ||||||
| 
 |  | ||||||
| 	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, |  | ||||||
| 		mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); |  | ||||||
| 
 |  | ||||||
| 	list_for_each_entry(da, &ds->ds_addrs, da_node) { |  | ||||||
| 		dprintk("%s: DS %s: trying address %s\n", |  | ||||||
| 			__func__, ds->ds_remotestr, da->da_remotestr); |  | ||||||
| 
 |  | ||||||
| 		clp = nfs4_set_ds_client(mds_srv->nfs_client, |  | ||||||
| 					(struct sockaddr *)&da->da_addr, |  | ||||||
| 					da->da_addrlen, IPPROTO_TCP, |  | ||||||
| 					dataserver_timeo, dataserver_retrans); |  | ||||||
| 		if (!IS_ERR(clp)) |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (IS_ERR(clp)) { |  | ||||||
| 		status = PTR_ERR(clp); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); |  | ||||||
| 	if (status) |  | ||||||
| 		goto out_put; |  | ||||||
| 
 |  | ||||||
| 	smp_wmb(); |  | ||||||
| 	ds->ds_clp = clp; |  | ||||||
| 	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); |  | ||||||
| out: |  | ||||||
| 	return status; |  | ||||||
| out_put: |  | ||||||
| 	nfs_put_client(clp); |  | ||||||
| 	goto out; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void |  | ||||||
| destroy_ds(struct nfs4_pnfs_ds *ds) |  | ||||||
| { |  | ||||||
| 	struct nfs4_pnfs_ds_addr *da; |  | ||||||
| 
 |  | ||||||
| 	dprintk("--> %s\n", __func__); |  | ||||||
| 	ifdebug(FACILITY) |  | ||||||
| 		print_ds(ds); |  | ||||||
| 
 |  | ||||||
| 	nfs_put_client(ds->ds_clp); |  | ||||||
| 
 |  | ||||||
| 	while (!list_empty(&ds->ds_addrs)) { |  | ||||||
| 		da = list_first_entry(&ds->ds_addrs, |  | ||||||
| 				      struct nfs4_pnfs_ds_addr, |  | ||||||
| 				      da_node); |  | ||||||
| 		list_del_init(&da->da_node); |  | ||||||
| 		kfree(da->da_remotestr); |  | ||||||
| 		kfree(da); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	kfree(ds->ds_remotestr); |  | ||||||
| 	kfree(ds); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void | void | ||||||
| nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | ||||||
| { | { | ||||||
| @ -229,259 +51,13 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < dsaddr->ds_num; i++) { | 	for (i = 0; i < dsaddr->ds_num; i++) { | ||||||
| 		ds = dsaddr->ds_list[i]; | 		ds = dsaddr->ds_list[i]; | ||||||
| 		if (ds != NULL) { | 		if (ds != NULL) | ||||||
| 			if (atomic_dec_and_lock(&ds->ds_count, | 			nfs4_pnfs_ds_put(ds); | ||||||
| 						&nfs4_ds_cache_lock)) { |  | ||||||
| 				list_del_init(&ds->ds_node); |  | ||||||
| 				spin_unlock(&nfs4_ds_cache_lock); |  | ||||||
| 				destroy_ds(ds); |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| 	kfree(dsaddr->stripe_indices); | 	kfree(dsaddr->stripe_indices); | ||||||
| 	kfree(dsaddr); | 	kfree(dsaddr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Create a string with a human readable address and port to avoid |  | ||||||
|  * complicated setup around many dprinks. |  | ||||||
|  */ |  | ||||||
| static char * |  | ||||||
| nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) |  | ||||||
| { |  | ||||||
| 	struct nfs4_pnfs_ds_addr *da; |  | ||||||
| 	char *remotestr; |  | ||||||
| 	size_t len; |  | ||||||
| 	char *p; |  | ||||||
| 
 |  | ||||||
| 	len = 3;        /* '{', '}' and eol */ |  | ||||||
| 	list_for_each_entry(da, dsaddrs, da_node) { |  | ||||||
| 		len += strlen(da->da_remotestr) + 1;    /* string plus comma */ |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	remotestr = kzalloc(len, gfp_flags); |  | ||||||
| 	if (!remotestr) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	p = remotestr; |  | ||||||
| 	*(p++) = '{'; |  | ||||||
| 	len--; |  | ||||||
| 	list_for_each_entry(da, dsaddrs, da_node) { |  | ||||||
| 		size_t ll = strlen(da->da_remotestr); |  | ||||||
| 
 |  | ||||||
| 		if (ll > len) |  | ||||||
| 			goto out_err; |  | ||||||
| 
 |  | ||||||
| 		memcpy(p, da->da_remotestr, ll); |  | ||||||
| 		p += ll; |  | ||||||
| 		len -= ll; |  | ||||||
| 
 |  | ||||||
| 		if (len < 1) |  | ||||||
| 			goto out_err; |  | ||||||
| 		(*p++) = ','; |  | ||||||
| 		len--; |  | ||||||
| 	} |  | ||||||
| 	if (len < 2) |  | ||||||
| 		goto out_err; |  | ||||||
| 	*(p++) = '}'; |  | ||||||
| 	*p = '\0'; |  | ||||||
| 	return remotestr; |  | ||||||
| out_err: |  | ||||||
| 	kfree(remotestr); |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static struct nfs4_pnfs_ds * |  | ||||||
| nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) |  | ||||||
| { |  | ||||||
| 	struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; |  | ||||||
| 	char *remotestr; |  | ||||||
| 
 |  | ||||||
| 	if (list_empty(dsaddrs)) { |  | ||||||
| 		dprintk("%s: no addresses defined\n", __func__); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	ds = kzalloc(sizeof(*ds), gfp_flags); |  | ||||||
| 	if (!ds) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	/* this is only used for debugging, so it's ok if its NULL */ |  | ||||||
| 	remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); |  | ||||||
| 
 |  | ||||||
| 	spin_lock(&nfs4_ds_cache_lock); |  | ||||||
| 	tmp_ds = _data_server_lookup_locked(dsaddrs); |  | ||||||
| 	if (tmp_ds == NULL) { |  | ||||||
| 		INIT_LIST_HEAD(&ds->ds_addrs); |  | ||||||
| 		list_splice_init(dsaddrs, &ds->ds_addrs); |  | ||||||
| 		ds->ds_remotestr = remotestr; |  | ||||||
| 		atomic_set(&ds->ds_count, 1); |  | ||||||
| 		INIT_LIST_HEAD(&ds->ds_node); |  | ||||||
| 		ds->ds_clp = NULL; |  | ||||||
| 		list_add(&ds->ds_node, &nfs4_data_server_cache); |  | ||||||
| 		dprintk("%s add new data server %s\n", __func__, |  | ||||||
| 			ds->ds_remotestr); |  | ||||||
| 	} else { |  | ||||||
| 		kfree(remotestr); |  | ||||||
| 		kfree(ds); |  | ||||||
| 		atomic_inc(&tmp_ds->ds_count); |  | ||||||
| 		dprintk("%s data server %s found, inc'ed ds_count to %d\n", |  | ||||||
| 			__func__, tmp_ds->ds_remotestr, |  | ||||||
| 			atomic_read(&tmp_ds->ds_count)); |  | ||||||
| 		ds = tmp_ds; |  | ||||||
| 	} |  | ||||||
| 	spin_unlock(&nfs4_ds_cache_lock); |  | ||||||
| out: |  | ||||||
| 	return ds; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Currently only supports ipv4, ipv6 and one multi-path address. |  | ||||||
|  */ |  | ||||||
| static struct nfs4_pnfs_ds_addr * |  | ||||||
| decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) |  | ||||||
| { |  | ||||||
| 	struct nfs4_pnfs_ds_addr *da = NULL; |  | ||||||
| 	char *buf, *portstr; |  | ||||||
| 	__be16 port; |  | ||||||
| 	int nlen, rlen; |  | ||||||
| 	int tmp[2]; |  | ||||||
| 	__be32 *p; |  | ||||||
| 	char *netid, *match_netid; |  | ||||||
| 	size_t len, match_netid_len; |  | ||||||
| 	char *startsep = ""; |  | ||||||
| 	char *endsep = ""; |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 	/* r_netid */ |  | ||||||
| 	p = xdr_inline_decode(streamp, 4); |  | ||||||
| 	if (unlikely(!p)) |  | ||||||
| 		goto out_err; |  | ||||||
| 	nlen = be32_to_cpup(p++); |  | ||||||
| 
 |  | ||||||
| 	p = xdr_inline_decode(streamp, nlen); |  | ||||||
| 	if (unlikely(!p)) |  | ||||||
| 		goto out_err; |  | ||||||
| 
 |  | ||||||
| 	netid = kmalloc(nlen+1, gfp_flags); |  | ||||||
| 	if (unlikely(!netid)) |  | ||||||
| 		goto out_err; |  | ||||||
| 
 |  | ||||||
| 	netid[nlen] = '\0'; |  | ||||||
| 	memcpy(netid, p, nlen); |  | ||||||
| 
 |  | ||||||
| 	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ |  | ||||||
| 	p = xdr_inline_decode(streamp, 4); |  | ||||||
| 	if (unlikely(!p)) |  | ||||||
| 		goto out_free_netid; |  | ||||||
| 	rlen = be32_to_cpup(p); |  | ||||||
| 
 |  | ||||||
| 	p = xdr_inline_decode(streamp, rlen); |  | ||||||
| 	if (unlikely(!p)) |  | ||||||
| 		goto out_free_netid; |  | ||||||
| 
 |  | ||||||
| 	/* port is ".ABC.DEF", 8 chars max */ |  | ||||||
| 	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { |  | ||||||
| 		dprintk("%s: Invalid address, length %d\n", __func__, |  | ||||||
| 			rlen); |  | ||||||
| 		goto out_free_netid; |  | ||||||
| 	} |  | ||||||
| 	buf = kmalloc(rlen + 1, gfp_flags); |  | ||||||
| 	if (!buf) { |  | ||||||
| 		dprintk("%s: Not enough memory\n", __func__); |  | ||||||
| 		goto out_free_netid; |  | ||||||
| 	} |  | ||||||
| 	buf[rlen] = '\0'; |  | ||||||
| 	memcpy(buf, p, rlen); |  | ||||||
| 
 |  | ||||||
| 	/* replace port '.' with '-' */ |  | ||||||
| 	portstr = strrchr(buf, '.'); |  | ||||||
| 	if (!portstr) { |  | ||||||
| 		dprintk("%s: Failed finding expected dot in port\n", |  | ||||||
| 			__func__); |  | ||||||
| 		goto out_free_buf; |  | ||||||
| 	} |  | ||||||
| 	*portstr = '-'; |  | ||||||
| 
 |  | ||||||
| 	/* find '.' between address and port */ |  | ||||||
| 	portstr = strrchr(buf, '.'); |  | ||||||
| 	if (!portstr) { |  | ||||||
| 		dprintk("%s: Failed finding expected dot between address and " |  | ||||||
| 			"port\n", __func__); |  | ||||||
| 		goto out_free_buf; |  | ||||||
| 	} |  | ||||||
| 	*portstr = '\0'; |  | ||||||
| 
 |  | ||||||
| 	da = kzalloc(sizeof(*da), gfp_flags); |  | ||||||
| 	if (unlikely(!da)) |  | ||||||
| 		goto out_free_buf; |  | ||||||
| 
 |  | ||||||
| 	INIT_LIST_HEAD(&da->da_node); |  | ||||||
| 
 |  | ||||||
| 	if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, |  | ||||||
| 		      sizeof(da->da_addr))) { |  | ||||||
| 		dprintk("%s: error parsing address %s\n", __func__, buf); |  | ||||||
| 		goto out_free_da; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	portstr++; |  | ||||||
| 	sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); |  | ||||||
| 	port = htons((tmp[0] << 8) | (tmp[1])); |  | ||||||
| 
 |  | ||||||
| 	switch (da->da_addr.ss_family) { |  | ||||||
| 	case AF_INET: |  | ||||||
| 		((struct sockaddr_in *)&da->da_addr)->sin_port = port; |  | ||||||
| 		da->da_addrlen = sizeof(struct sockaddr_in); |  | ||||||
| 		match_netid = "tcp"; |  | ||||||
| 		match_netid_len = 3; |  | ||||||
| 		break; |  | ||||||
| 
 |  | ||||||
| 	case AF_INET6: |  | ||||||
| 		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; |  | ||||||
| 		da->da_addrlen = sizeof(struct sockaddr_in6); |  | ||||||
| 		match_netid = "tcp6"; |  | ||||||
| 		match_netid_len = 4; |  | ||||||
| 		startsep = "["; |  | ||||||
| 		endsep = "]"; |  | ||||||
| 		break; |  | ||||||
| 
 |  | ||||||
| 	default: |  | ||||||
| 		dprintk("%s: unsupported address family: %u\n", |  | ||||||
| 			__func__, da->da_addr.ss_family); |  | ||||||
| 		goto out_free_da; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { |  | ||||||
| 		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", |  | ||||||
| 			__func__, netid, match_netid); |  | ||||||
| 		goto out_free_da; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/* save human readable address */ |  | ||||||
| 	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; |  | ||||||
| 	da->da_remotestr = kzalloc(len, gfp_flags); |  | ||||||
| 
 |  | ||||||
| 	/* NULL is ok, only used for dprintk */ |  | ||||||
| 	if (da->da_remotestr) |  | ||||||
| 		snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, |  | ||||||
| 			 buf, endsep, ntohs(port)); |  | ||||||
| 
 |  | ||||||
| 	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); |  | ||||||
| 	kfree(buf); |  | ||||||
| 	kfree(netid); |  | ||||||
| 	return da; |  | ||||||
| 
 |  | ||||||
| out_free_da: |  | ||||||
| 	kfree(da); |  | ||||||
| out_free_buf: |  | ||||||
| 	dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); |  | ||||||
| 	kfree(buf); |  | ||||||
| out_free_netid: |  | ||||||
| 	kfree(netid); |  | ||||||
| out_err: |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* Decode opaque device data and return the result */ | /* Decode opaque device data and return the result */ | ||||||
| struct nfs4_file_layout_dsaddr * | struct nfs4_file_layout_dsaddr * | ||||||
| nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||||||
| @ -584,7 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |||||||
| 
 | 
 | ||||||
| 		mp_count = be32_to_cpup(p); /* multipath count */ | 		mp_count = be32_to_cpup(p); /* multipath count */ | ||||||
| 		for (j = 0; j < mp_count; j++) { | 		for (j = 0; j < mp_count; j++) { | ||||||
| 			da = decode_ds_addr(server->nfs_client->cl_net, | 			da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | ||||||
| 						    &stream, gfp_flags); | 						    &stream, gfp_flags); | ||||||
| 			if (da) | 			if (da) | ||||||
| 				list_add_tail(&da->da_node, &dsaddrs); | 				list_add_tail(&da->da_node, &dsaddrs); | ||||||
| @ -681,22 +257,7 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |||||||
| 	return flseg->fh_array[i]; | 	return flseg->fh_array[i]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) | /* Upon return, either ds is connected, or ds is NULL */ | ||||||
| { |  | ||||||
| 	might_sleep(); |  | ||||||
| 	wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING, |  | ||||||
| 			   nfs_wait_bit_killable, TASK_KILLABLE); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) |  | ||||||
| { |  | ||||||
| 	smp_mb__before_atomic(); |  | ||||||
| 	clear_bit(NFS4DS_CONNECTING, &ds->ds_state); |  | ||||||
| 	smp_mb__after_atomic(); |  | ||||||
| 	wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| struct nfs4_pnfs_ds * | struct nfs4_pnfs_ds * | ||||||
| nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | ||||||
| { | { | ||||||
| @ -704,29 +265,23 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||||||
| 	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | 	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | ||||||
| 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); | 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); | ||||||
| 	struct nfs4_pnfs_ds *ret = ds; | 	struct nfs4_pnfs_ds *ret = ds; | ||||||
|  | 	struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); | ||||||
| 
 | 
 | ||||||
| 	if (ds == NULL) { | 	if (ds == NULL) { | ||||||
| 		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | 		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | ||||||
| 			__func__, ds_idx); | 			__func__, ds_idx); | ||||||
| 		filelayout_mark_devid_invalid(devid); | 		pnfs_generic_mark_devid_invalid(devid); | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
| 	smp_rmb(); | 	smp_rmb(); | ||||||
| 	if (ds->ds_clp) | 	if (ds->ds_clp) | ||||||
| 		goto out_test_devid; | 		goto out_test_devid; | ||||||
| 
 | 
 | ||||||
| 	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { | 	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, | ||||||
| 		struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); | 			     dataserver_retrans, 4, | ||||||
| 		int err; | 			     s->nfs_client->cl_minorversion, | ||||||
|  | 			     s->nfs_client->cl_rpcclient->cl_auth->au_flavor); | ||||||
| 
 | 
 | ||||||
| 		err = nfs4_ds_connect(s, ds); |  | ||||||
| 		if (err) |  | ||||||
| 			nfs4_mark_deviceid_unavailable(devid); |  | ||||||
| 		nfs4_clear_ds_conn_bit(ds); |  | ||||||
| 	} else { |  | ||||||
| 		/* Either ds is connected, or ds is NULL */ |  | ||||||
| 		nfs4_wait_ds_connect(ds); |  | ||||||
| 	} |  | ||||||
| out_test_devid: | out_test_devid: | ||||||
| 	if (filelayout_test_devid_unavailable(devid)) | 	if (filelayout_test_devid_unavailable(devid)) | ||||||
| 		ret = NULL; | 		ret = NULL; | ||||||
|  | |||||||
							
								
								
									
										5
									
								
								fs/nfs/flexfilelayout/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								fs/nfs/flexfilelayout/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,5 @@ | |||||||
|  | #
 | ||||||
|  | # Makefile for the pNFS Flexfile Layout Driver kernel module
 | ||||||
|  | #
 | ||||||
|  | obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += nfs_layout_flexfiles.o | ||||||
|  | nfs_layout_flexfiles-y := flexfilelayout.o flexfilelayoutdev.o | ||||||
							
								
								
									
										1574
									
								
								fs/nfs/flexfilelayout/flexfilelayout.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1574
									
								
								fs/nfs/flexfilelayout/flexfilelayout.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										155
									
								
								fs/nfs/flexfilelayout/flexfilelayout.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								fs/nfs/flexfilelayout/flexfilelayout.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,155 @@ | |||||||
|  | /*
 | ||||||
|  |  * NFSv4 flexfile layout driver data structures. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||||||
|  |  * | ||||||
|  |  * Tao Peng <bergwolf@primarydata.com> | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H | ||||||
|  | #define FS_NFS_NFS4FLEXFILELAYOUT_H | ||||||
|  | 
 | ||||||
|  | #include "../pnfs.h" | ||||||
|  | 
 | ||||||
|  | /* XXX: Let's filter out insanely large mirror count for now to avoid oom
 | ||||||
|  |  * due to network error etc. */ | ||||||
|  | #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 | ||||||
|  | 
 | ||||||
|  | struct nfs4_ff_ds_version { | ||||||
|  | 	u32				version; | ||||||
|  | 	u32				minor_version; | ||||||
|  | 	u32				rsize; | ||||||
|  | 	u32				wsize; | ||||||
|  | 	bool				tightly_coupled; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /* chained in global deviceid hlist */ | ||||||
|  | struct nfs4_ff_layout_ds { | ||||||
|  | 	struct nfs4_deviceid_node	id_node; | ||||||
|  | 	u32				ds_versions_cnt; | ||||||
|  | 	struct nfs4_ff_ds_version	*ds_versions; | ||||||
|  | 	struct nfs4_pnfs_ds		*ds; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct nfs4_ff_layout_ds_err { | ||||||
|  | 	struct list_head		list; /* linked in mirror error_list */ | ||||||
|  | 	u64				offset; | ||||||
|  | 	u64				length; | ||||||
|  | 	int				status; | ||||||
|  | 	enum nfs_opnum4			opnum; | ||||||
|  | 	nfs4_stateid			stateid; | ||||||
|  | 	struct nfs4_deviceid		deviceid; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct nfs4_ff_layout_mirror { | ||||||
|  | 	u32				ds_count; | ||||||
|  | 	u32				efficiency; | ||||||
|  | 	struct nfs4_ff_layout_ds	*mirror_ds; | ||||||
|  | 	u32				fh_versions_cnt; | ||||||
|  | 	struct nfs_fh			*fh_versions; | ||||||
|  | 	nfs4_stateid			stateid; | ||||||
|  | 	struct nfs4_string		user_name; | ||||||
|  | 	struct nfs4_string		group_name; | ||||||
|  | 	u32				uid; | ||||||
|  | 	u32				gid; | ||||||
|  | 	struct rpc_cred			*cred; | ||||||
|  | 	spinlock_t			lock; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct nfs4_ff_layout_segment { | ||||||
|  | 	struct pnfs_layout_segment	generic_hdr; | ||||||
|  | 	u64				stripe_unit; | ||||||
|  | 	u32				mirror_array_cnt; | ||||||
|  | 	struct nfs4_ff_layout_mirror	**mirror_array; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct nfs4_flexfile_layout { | ||||||
|  | 	struct pnfs_layout_hdr generic_hdr; | ||||||
|  | 	struct pnfs_ds_commit_info commit_info; | ||||||
|  | 	struct list_head	error_list; /* nfs4_ff_layout_ds_err */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static inline struct nfs4_flexfile_layout * | ||||||
|  | FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	return container_of(lo, struct nfs4_flexfile_layout, generic_hdr); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct nfs4_ff_layout_segment * | ||||||
|  | FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) | ||||||
|  | { | ||||||
|  | 	return container_of(lseg, | ||||||
|  | 			    struct nfs4_ff_layout_segment, | ||||||
|  | 			    generic_hdr); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct nfs4_deviceid_node * | ||||||
|  | FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) | ||||||
|  | { | ||||||
|  | 	if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || | ||||||
|  | 	    FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || | ||||||
|  | 	    FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) | ||||||
|  | 		return NULL; | ||||||
|  | 	return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct nfs4_ff_layout_ds * | ||||||
|  | FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) | ||||||
|  | { | ||||||
|  | 	return container_of(node, struct nfs4_ff_layout_ds, id_node); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct nfs4_ff_layout_mirror * | ||||||
|  | FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) | ||||||
|  | { | ||||||
|  | 	if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) | ||||||
|  | 		return NULL; | ||||||
|  | 	return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline u32 | ||||||
|  | FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) | ||||||
|  | { | ||||||
|  | 	return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool | ||||||
|  | ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) | ||||||
|  | { | ||||||
|  | 	return nfs4_test_deviceid_unavailable(node); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline int | ||||||
|  | nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx) | ||||||
|  | { | ||||||
|  | 	return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct nfs4_ff_layout_ds * | ||||||
|  | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||||||
|  | 			    gfp_t gfp_flags); | ||||||
|  | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); | ||||||
|  | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); | ||||||
|  | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | ||||||
|  | 			     struct nfs4_ff_layout_mirror *mirror, u64 offset, | ||||||
|  | 			     u64 length, int status, enum nfs_opnum4 opnum, | ||||||
|  | 			     gfp_t gfp_flags); | ||||||
|  | int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, | ||||||
|  | 			      struct xdr_stream *xdr, int *count, | ||||||
|  | 			      const struct pnfs_layout_range *range); | ||||||
|  | struct nfs_fh * | ||||||
|  | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); | ||||||
|  | 
 | ||||||
|  | struct nfs4_pnfs_ds * | ||||||
|  | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||||||
|  | 			  bool fail_return); | ||||||
|  | 
 | ||||||
|  | struct rpc_clnt * | ||||||
|  | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, | ||||||
|  | 				 u32 ds_idx, | ||||||
|  | 				 struct nfs_client *ds_clp, | ||||||
|  | 				 struct inode *inode); | ||||||
|  | struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, | ||||||
|  | 				       u32 ds_idx, struct rpc_cred *mdscred); | ||||||
|  | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); | ||||||
|  | #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ | ||||||
							
								
								
									
										552
									
								
								fs/nfs/flexfilelayout/flexfilelayoutdev.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										552
									
								
								fs/nfs/flexfilelayout/flexfilelayoutdev.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,552 @@ | |||||||
|  | /*
 | ||||||
|  |  * Device operations for the pnfs nfs4 file layout driver. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||||||
|  |  * | ||||||
|  |  * Tao Peng <bergwolf@primarydata.com> | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <linux/nfs_fs.h> | ||||||
|  | #include <linux/vmalloc.h> | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/sunrpc/addr.h> | ||||||
|  | 
 | ||||||
|  | #include "../internal.h" | ||||||
|  | #include "../nfs4session.h" | ||||||
|  | #include "flexfilelayout.h" | ||||||
|  | 
 | ||||||
|  | #define NFSDBG_FACILITY		NFSDBG_PNFS_LD | ||||||
|  | 
 | ||||||
|  | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | ||||||
|  | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | ||||||
|  | 
 | ||||||
|  | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||||||
|  | { | ||||||
|  | 	if (mirror_ds) | ||||||
|  | 		nfs4_put_deviceid_node(&mirror_ds->id_node); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||||||
|  | { | ||||||
|  | 	nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | ||||||
|  | 	nfs4_pnfs_ds_put(mirror_ds->ds); | ||||||
|  | 	kfree(mirror_ds); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Decode opaque device data and construct new_ds using it */ | ||||||
|  | struct nfs4_ff_layout_ds * | ||||||
|  | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||||||
|  | 			    gfp_t gfp_flags) | ||||||
|  | { | ||||||
|  | 	struct xdr_stream stream; | ||||||
|  | 	struct xdr_buf buf; | ||||||
|  | 	struct page *scratch; | ||||||
|  | 	struct list_head dsaddrs; | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da; | ||||||
|  | 	struct nfs4_ff_layout_ds *new_ds = NULL; | ||||||
|  | 	struct nfs4_ff_ds_version *ds_versions = NULL; | ||||||
|  | 	u32 mp_count; | ||||||
|  | 	u32 version_count; | ||||||
|  | 	__be32 *p; | ||||||
|  | 	int i, ret = -ENOMEM; | ||||||
|  | 
 | ||||||
|  | 	/* set up xdr stream */ | ||||||
|  | 	scratch = alloc_page(gfp_flags); | ||||||
|  | 	if (!scratch) | ||||||
|  | 		goto out_err; | ||||||
|  | 
 | ||||||
|  | 	new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); | ||||||
|  | 	if (!new_ds) | ||||||
|  | 		goto out_scratch; | ||||||
|  | 
 | ||||||
|  | 	nfs4_init_deviceid_node(&new_ds->id_node, | ||||||
|  | 				server, | ||||||
|  | 				&pdev->dev_id); | ||||||
|  | 	INIT_LIST_HEAD(&dsaddrs); | ||||||
|  | 
 | ||||||
|  | 	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | ||||||
|  | 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||||||
|  | 
 | ||||||
|  | 	/* multipath count */ | ||||||
|  | 	p = xdr_inline_decode(&stream, 4); | ||||||
|  | 	if (unlikely(!p)) | ||||||
|  | 		goto out_err_drain_dsaddrs; | ||||||
|  | 	mp_count = be32_to_cpup(p); | ||||||
|  | 	dprintk("%s: multipath ds count %d\n", __func__, mp_count); | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < mp_count; i++) { | ||||||
|  | 		/* multipath ds */ | ||||||
|  | 		da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | ||||||
|  | 					    &stream, gfp_flags); | ||||||
|  | 		if (da) | ||||||
|  | 			list_add_tail(&da->da_node, &dsaddrs); | ||||||
|  | 	} | ||||||
|  | 	if (list_empty(&dsaddrs)) { | ||||||
|  | 		dprintk("%s: no suitable DS addresses found\n", | ||||||
|  | 			__func__); | ||||||
|  | 		ret = -ENOMEDIUM; | ||||||
|  | 		goto out_err_drain_dsaddrs; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* version count */ | ||||||
|  | 	p = xdr_inline_decode(&stream, 4); | ||||||
|  | 	if (unlikely(!p)) | ||||||
|  | 		goto out_err_drain_dsaddrs; | ||||||
|  | 	version_count = be32_to_cpup(p); | ||||||
|  | 	dprintk("%s: version count %d\n", __func__, version_count); | ||||||
|  | 
 | ||||||
|  | 	ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), | ||||||
|  | 			      gfp_flags); | ||||||
|  | 	if (!ds_versions) | ||||||
|  | 		goto out_scratch; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < version_count; i++) { | ||||||
|  | 		/* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) +
 | ||||||
|  | 		 * tightly_coupled(4) */ | ||||||
|  | 		p = xdr_inline_decode(&stream, 20); | ||||||
|  | 		if (unlikely(!p)) | ||||||
|  | 			goto out_err_drain_dsaddrs; | ||||||
|  | 		ds_versions[i].version = be32_to_cpup(p++); | ||||||
|  | 		ds_versions[i].minor_version = be32_to_cpup(p++); | ||||||
|  | 		ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); | ||||||
|  | 		ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); | ||||||
|  | 		ds_versions[i].tightly_coupled = be32_to_cpup(p); | ||||||
|  | 
 | ||||||
|  | 		if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) | ||||||
|  | 			ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; | ||||||
|  | 		if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) | ||||||
|  | 			ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; | ||||||
|  | 
 | ||||||
|  | 		if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { | ||||||
|  | 			dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, | ||||||
|  | 				i, ds_versions[i].version, | ||||||
|  | 				ds_versions[i].minor_version); | ||||||
|  | 			ret = -EPROTONOSUPPORT; | ||||||
|  | 			goto out_err_drain_dsaddrs; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", | ||||||
|  | 			__func__, i, ds_versions[i].version, | ||||||
|  | 			ds_versions[i].minor_version, | ||||||
|  | 			ds_versions[i].rsize, | ||||||
|  | 			ds_versions[i].wsize, | ||||||
|  | 			ds_versions[i].tightly_coupled); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	new_ds->ds_versions = ds_versions; | ||||||
|  | 	new_ds->ds_versions_cnt = version_count; | ||||||
|  | 
 | ||||||
|  | 	new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | ||||||
|  | 	if (!new_ds->ds) | ||||||
|  | 		goto out_err_drain_dsaddrs; | ||||||
|  | 
 | ||||||
|  | 	/* If DS was already in cache, free ds addrs */ | ||||||
|  | 	while (!list_empty(&dsaddrs)) { | ||||||
|  | 		da = list_first_entry(&dsaddrs, | ||||||
|  | 				      struct nfs4_pnfs_ds_addr, | ||||||
|  | 				      da_node); | ||||||
|  | 		list_del_init(&da->da_node); | ||||||
|  | 		kfree(da->da_remotestr); | ||||||
|  | 		kfree(da); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	__free_page(scratch); | ||||||
|  | 	return new_ds; | ||||||
|  | 
 | ||||||
|  | out_err_drain_dsaddrs: | ||||||
|  | 	while (!list_empty(&dsaddrs)) { | ||||||
|  | 		da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | ||||||
|  | 				      da_node); | ||||||
|  | 		list_del_init(&da->da_node); | ||||||
|  | 		kfree(da->da_remotestr); | ||||||
|  | 		kfree(da); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	kfree(ds_versions); | ||||||
|  | out_scratch: | ||||||
|  | 	__free_page(scratch); | ||||||
|  | out_err: | ||||||
|  | 	kfree(new_ds); | ||||||
|  | 
 | ||||||
|  | 	dprintk("%s ERROR: returning %d\n", __func__, ret); | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static u64 | ||||||
|  | end_offset(u64 start, u64 len) | ||||||
|  | { | ||||||
|  | 	u64 end; | ||||||
|  | 
 | ||||||
|  | 	end = start + len; | ||||||
|  | 	return end >= start ? end : NFS4_MAX_UINT64; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, | ||||||
|  | 			    u64 offset, u64 length) | ||||||
|  | { | ||||||
|  | 	u64 end; | ||||||
|  | 
 | ||||||
|  | 	end = max_t(u64, end_offset(err->offset, err->length), | ||||||
|  | 		    end_offset(offset, length)); | ||||||
|  | 	err->offset = min_t(u64, err->offset, offset); | ||||||
|  | 	err->length = end - err->offset; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err,  u64 offset, | ||||||
|  | 			       u64 length, int status, enum nfs_opnum4 opnum, | ||||||
|  | 			       nfs4_stateid *stateid, | ||||||
|  | 			       struct nfs4_deviceid *deviceid) | ||||||
|  | { | ||||||
|  | 	return err->status == status && err->opnum == opnum && | ||||||
|  | 	       nfs4_stateid_match(&err->stateid, stateid) && | ||||||
|  | 	       !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) && | ||||||
|  | 	       end_offset(err->offset, err->length) >= offset && | ||||||
|  | 	       err->offset <= end_offset(offset, length); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old, | ||||||
|  | 			   struct nfs4_ff_layout_ds_err *new) | ||||||
|  | { | ||||||
|  | 	if (!ds_error_can_merge(old, new->offset, new->length, new->status, | ||||||
|  | 				new->opnum, &new->stateid, &new->deviceid)) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	extend_ds_error(old, new->offset, new->length); | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool | ||||||
|  | ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, | ||||||
|  | 			      struct nfs4_ff_layout_ds_err *dserr) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_ds_err *err; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(err, &flo->error_list, list) { | ||||||
|  | 		if (merge_ds_error(err, dserr)) { | ||||||
|  | 			return true; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	list_add(&dserr->list, &flo->error_list); | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool | ||||||
|  | ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset, | ||||||
|  | 			  u64 length, int status, enum nfs_opnum4 opnum, | ||||||
|  | 			  nfs4_stateid *stateid, struct nfs4_deviceid *deviceid) | ||||||
|  | { | ||||||
|  | 	bool found = false; | ||||||
|  | 	struct nfs4_ff_layout_ds_err *err; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(err, &flo->error_list, list) { | ||||||
|  | 		if (ds_error_can_merge(err, offset, length, status, opnum, | ||||||
|  | 				       stateid, deviceid)) { | ||||||
|  | 			found = true; | ||||||
|  | 			extend_ds_error(err, offset, length); | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return found; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | ||||||
|  | 			     struct nfs4_ff_layout_mirror *mirror, u64 offset, | ||||||
|  | 			     u64 length, int status, enum nfs_opnum4 opnum, | ||||||
|  | 			     gfp_t gfp_flags) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_ds_err *dserr; | ||||||
|  | 	bool needfree; | ||||||
|  | 
 | ||||||
|  | 	if (status == 0) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	if (mirror->mirror_ds == NULL) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&flo->generic_hdr.plh_inode->i_lock); | ||||||
|  | 	if (ff_layout_update_ds_error(flo, offset, length, status, opnum, | ||||||
|  | 				      &mirror->stateid, | ||||||
|  | 				      &mirror->mirror_ds->id_node.deviceid)) { | ||||||
|  | 		spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 	spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||||||
|  | 	dserr = kmalloc(sizeof(*dserr), gfp_flags); | ||||||
|  | 	if (!dserr) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 
 | ||||||
|  | 	INIT_LIST_HEAD(&dserr->list); | ||||||
|  | 	dserr->offset = offset; | ||||||
|  | 	dserr->length = length; | ||||||
|  | 	dserr->status = status; | ||||||
|  | 	dserr->opnum = opnum; | ||||||
|  | 	nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); | ||||||
|  | 	memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, | ||||||
|  | 	       NFS4_DEVICEID4_SIZE); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&flo->generic_hdr.plh_inode->i_lock); | ||||||
|  | 	needfree = ff_layout_add_ds_error_locked(flo, dserr); | ||||||
|  | 	spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||||||
|  | 	if (needfree) | ||||||
|  | 		kfree(dserr); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* currently we only support AUTH_NONE and AUTH_SYS */ | ||||||
|  | static rpc_authflavor_t | ||||||
|  | nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror) | ||||||
|  | { | ||||||
|  | 	if (mirror->uid == (u32)-1) | ||||||
|  | 		return RPC_AUTH_NULL; | ||||||
|  | 	return RPC_AUTH_UNIX; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* fetch cred for NFSv3 DS */ | ||||||
|  | static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, | ||||||
|  | 				      struct nfs4_pnfs_ds *ds) | ||||||
|  | { | ||||||
|  | 	if (ds->ds_clp && !mirror->cred && | ||||||
|  | 	    mirror->mirror_ds->ds_versions[0].version == 3) { | ||||||
|  | 		struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; | ||||||
|  | 		struct rpc_cred *cred; | ||||||
|  | 		struct auth_cred acred = { | ||||||
|  | 			.uid = make_kuid(&init_user_ns, mirror->uid), | ||||||
|  | 			.gid = make_kgid(&init_user_ns, mirror->gid), | ||||||
|  | 		}; | ||||||
|  | 
 | ||||||
|  | 		/* AUTH_NULL ignores acred */ | ||||||
|  | 		cred = auth->au_ops->lookup_cred(auth, &acred, 0); | ||||||
|  | 		if (IS_ERR(cred)) { | ||||||
|  | 			dprintk("%s: lookup_cred failed with %ld\n", | ||||||
|  | 				__func__, PTR_ERR(cred)); | ||||||
|  | 			return PTR_ERR(cred); | ||||||
|  | 		} else { | ||||||
|  | 			mirror->cred = cred; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct nfs_fh * | ||||||
|  | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | ||||||
|  | 	struct nfs_fh *fh = NULL; | ||||||
|  | 	struct nfs4_deviceid_node *devid; | ||||||
|  | 
 | ||||||
|  | 	if (mirror == NULL || mirror->mirror_ds == NULL || | ||||||
|  | 	    mirror->mirror_ds->ds == NULL) { | ||||||
|  | 		printk(KERN_ERR "NFS: %s: No data server for mirror offset index %d\n", | ||||||
|  | 			__func__, mirror_idx); | ||||||
|  | 		if (mirror && mirror->mirror_ds) { | ||||||
|  | 			devid = &mirror->mirror_ds->id_node; | ||||||
|  | 			pnfs_generic_mark_devid_invalid(devid); | ||||||
|  | 		} | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* FIXME: For now assume there is only 1 version available for the DS */ | ||||||
|  | 	fh = &mirror->fh_versions[0]; | ||||||
|  | out: | ||||||
|  | 	return fh; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Upon return, either ds is connected, or ds is NULL */ | ||||||
|  | struct nfs4_pnfs_ds * | ||||||
|  | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||||||
|  | 			  bool fail_return) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||||||
|  | 	struct nfs4_pnfs_ds *ds = NULL; | ||||||
|  | 	struct nfs4_deviceid_node *devid; | ||||||
|  | 	struct inode *ino = lseg->pls_layout->plh_inode; | ||||||
|  | 	struct nfs_server *s = NFS_SERVER(ino); | ||||||
|  | 	unsigned int max_payload; | ||||||
|  | 	rpc_authflavor_t flavor; | ||||||
|  | 
 | ||||||
|  | 	if (mirror == NULL || mirror->mirror_ds == NULL || | ||||||
|  | 	    mirror->mirror_ds->ds == NULL) { | ||||||
|  | 		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | ||||||
|  | 			__func__, ds_idx); | ||||||
|  | 		if (mirror && mirror->mirror_ds) { | ||||||
|  | 			devid = &mirror->mirror_ds->id_node; | ||||||
|  | 			pnfs_generic_mark_devid_invalid(devid); | ||||||
|  | 		} | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	devid = &mirror->mirror_ds->id_node; | ||||||
|  | 	if (ff_layout_test_devid_unavailable(devid)) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	ds = mirror->mirror_ds->ds; | ||||||
|  | 	/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | ||||||
|  | 	smp_rmb(); | ||||||
|  | 	if (ds->ds_clp) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	flavor = nfs4_ff_layout_choose_authflavor(mirror); | ||||||
|  | 
 | ||||||
|  | 	/* FIXME: For now we assume the server sent only one version of NFS
 | ||||||
|  | 	 * to use for the DS. | ||||||
|  | 	 */ | ||||||
|  | 	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, | ||||||
|  | 			     dataserver_retrans, | ||||||
|  | 			     mirror->mirror_ds->ds_versions[0].version, | ||||||
|  | 			     mirror->mirror_ds->ds_versions[0].minor_version, | ||||||
|  | 			     flavor); | ||||||
|  | 
 | ||||||
|  | 	/* connect success, check rsize/wsize limit */ | ||||||
|  | 	if (ds->ds_clp) { | ||||||
|  | 		max_payload = | ||||||
|  | 			nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), | ||||||
|  | 				       NULL); | ||||||
|  | 		if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) | ||||||
|  | 			mirror->mirror_ds->ds_versions[0].rsize = max_payload; | ||||||
|  | 		if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | ||||||
|  | 			mirror->mirror_ds->ds_versions[0].wsize = max_payload; | ||||||
|  | 	} else { | ||||||
|  | 		ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||||||
|  | 					 mirror, lseg->pls_range.offset, | ||||||
|  | 					 lseg->pls_range.length, NFS4ERR_NXIO, | ||||||
|  | 					 OP_ILLEGAL, GFP_NOIO); | ||||||
|  | 		if (fail_return) { | ||||||
|  | 			pnfs_error_mark_layout_for_return(ino, lseg); | ||||||
|  | 			if (ff_layout_has_available_ds(lseg)) | ||||||
|  | 				pnfs_set_retry_layoutget(lseg->pls_layout); | ||||||
|  | 			else | ||||||
|  | 				pnfs_clear_retry_layoutget(lseg->pls_layout); | ||||||
|  | 
 | ||||||
|  | 		} else { | ||||||
|  | 			if (ff_layout_has_available_ds(lseg)) | ||||||
|  | 				set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||||||
|  | 					&lseg->pls_layout->plh_flags); | ||||||
|  | 			else { | ||||||
|  | 				pnfs_error_mark_layout_for_return(ino, lseg); | ||||||
|  | 				pnfs_clear_retry_layoutget(lseg->pls_layout); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (ff_layout_update_mirror_cred(mirror, ds)) | ||||||
|  | 		ds = NULL; | ||||||
|  | out: | ||||||
|  | 	return ds; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct rpc_cred * | ||||||
|  | ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||||||
|  | 		      struct rpc_cred *mdscred) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||||||
|  | 	struct rpc_cred *cred = ERR_PTR(-EINVAL); | ||||||
|  | 
 | ||||||
|  | 	if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true)) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	if (mirror && mirror->cred) | ||||||
|  | 		cred = mirror->cred; | ||||||
|  | 	else | ||||||
|  | 		cred = mdscred; | ||||||
|  | out: | ||||||
|  | 	return cred; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  | * Find or create a DS rpc client with th MDS server rpc client auth flavor | ||||||
|  | * in the nfs_client cl_ds_clients list. | ||||||
|  | */ | ||||||
|  | struct rpc_clnt * | ||||||
|  | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||||||
|  | 				 struct nfs_client *ds_clp, struct inode *inode) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||||||
|  | 
 | ||||||
|  | 	switch (mirror->mirror_ds->ds_versions[0].version) { | ||||||
|  | 	case 3: | ||||||
|  | 		/* For NFSv3 DS, flavor is set when creating DS connections */ | ||||||
|  | 		return ds_clp->cl_rpcclient; | ||||||
|  | 	case 4: | ||||||
|  | 		return nfs4_find_or_create_ds_client(ds_clp, inode); | ||||||
|  | 	default: | ||||||
|  | 		BUG(); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool is_range_intersecting(u64 offset1, u64 length1, | ||||||
|  | 				  u64 offset2, u64 length2) | ||||||
|  | { | ||||||
|  | 	u64 end1 = end_offset(offset1, length1); | ||||||
|  | 	u64 end2 = end_offset(offset2, length2); | ||||||
|  | 
 | ||||||
|  | 	return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && | ||||||
|  | 	       (end2 == NFS4_MAX_UINT64 || end2 > offset1); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* called with inode i_lock held */ | ||||||
|  | int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, | ||||||
|  | 			      struct xdr_stream *xdr, int *count, | ||||||
|  | 			      const struct pnfs_layout_range *range) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_ds_err *err, *n; | ||||||
|  | 	__be32 *p; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry_safe(err, n, &flo->error_list, list) { | ||||||
|  | 		if (!is_range_intersecting(err->offset, err->length, | ||||||
|  | 					   range->offset, range->length)) | ||||||
|  | 			continue; | ||||||
|  | 		/* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE)
 | ||||||
|  | 		 * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) | ||||||
|  | 		 */ | ||||||
|  | 		p = xdr_reserve_space(xdr, | ||||||
|  | 				24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); | ||||||
|  | 		if (unlikely(!p)) | ||||||
|  | 			return -ENOBUFS; | ||||||
|  | 		p = xdr_encode_hyper(p, err->offset); | ||||||
|  | 		p = xdr_encode_hyper(p, err->length); | ||||||
|  | 		p = xdr_encode_opaque_fixed(p, &err->stateid, | ||||||
|  | 					    NFS4_STATEID_SIZE); | ||||||
|  | 		p = xdr_encode_opaque_fixed(p, &err->deviceid, | ||||||
|  | 					    NFS4_DEVICEID4_SIZE); | ||||||
|  | 		*p++ = cpu_to_be32(err->status); | ||||||
|  | 		*p++ = cpu_to_be32(err->opnum); | ||||||
|  | 		*count += 1; | ||||||
|  | 		list_del(&err->list); | ||||||
|  | 		kfree(err); | ||||||
|  | 		dprintk("%s: offset %llu length %llu status %d op %d count %d\n", | ||||||
|  | 			__func__, err->offset, err->length, err->status, | ||||||
|  | 			err->opnum, *count); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) | ||||||
|  | { | ||||||
|  | 	struct nfs4_ff_layout_mirror *mirror; | ||||||
|  | 	struct nfs4_deviceid_node *devid; | ||||||
|  | 	int idx; | ||||||
|  | 
 | ||||||
|  | 	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | ||||||
|  | 		mirror = FF_LAYOUT_COMP(lseg, idx); | ||||||
|  | 		if (mirror && mirror->mirror_ds) { | ||||||
|  | 			devid = &mirror->mirror_ds->id_node; | ||||||
|  | 			if (!ff_layout_test_devid_unavailable(devid)) | ||||||
|  | 				return true; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | module_param(dataserver_retrans, uint, 0644); | ||||||
|  | MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client " | ||||||
|  | 			"retries a request before it attempts further " | ||||||
|  | 			" recovery  action."); | ||||||
|  | module_param(dataserver_timeo, uint, 0644); | ||||||
|  | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | ||||||
|  | 			"NFSv4.1  client  waits for a response from a " | ||||||
|  | 			" data server before it retries an NFS request."); | ||||||
| @ -152,7 +152,7 @@ void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *f | |||||||
| 		nfs_fattr_free_group_name(fattr); | 		nfs_fattr_free_group_name(fattr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) | int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) | ||||||
| { | { | ||||||
| 	unsigned long val; | 	unsigned long val; | ||||||
| 	char buf[16]; | 	char buf[16]; | ||||||
| @ -166,6 +166,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re | |||||||
| 	*res = val; | 	*res = val; | ||||||
| 	return 1; | 	return 1; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric); | ||||||
| 
 | 
 | ||||||
| static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) | static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) | ||||||
| { | { | ||||||
|  | |||||||
| @ -6,6 +6,7 @@ | |||||||
| #include <linux/mount.h> | #include <linux/mount.h> | ||||||
| #include <linux/security.h> | #include <linux/security.h> | ||||||
| #include <linux/crc32.h> | #include <linux/crc32.h> | ||||||
|  | #include <linux/nfs_page.h> | ||||||
| 
 | 
 | ||||||
| #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | ||||||
| 
 | 
 | ||||||
| @ -187,9 +188,15 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||||||
| 					     const struct sockaddr *ds_addr, | 					     const struct sockaddr *ds_addr, | ||||||
| 					     int ds_addrlen, int ds_proto, | 					     int ds_addrlen, int ds_proto, | ||||||
| 					     unsigned int ds_timeo, | 					     unsigned int ds_timeo, | ||||||
| 					     unsigned int ds_retrans); | 					     unsigned int ds_retrans, | ||||||
|  | 					     u32 minor_version, | ||||||
|  | 					     rpc_authflavor_t au_flavor); | ||||||
| extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, | extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, | ||||||
| 						struct inode *); | 						struct inode *); | ||||||
|  | extern struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, | ||||||
|  | 			const struct sockaddr *ds_addr, int ds_addrlen, | ||||||
|  | 			int ds_proto, unsigned int ds_timeo, | ||||||
|  | 			unsigned int ds_retrans, rpc_authflavor_t au_flavor); | ||||||
| #ifdef CONFIG_PROC_FS | #ifdef CONFIG_PROC_FS | ||||||
| extern int __init nfs_fs_proc_init(void); | extern int __init nfs_fs_proc_init(void); | ||||||
| extern void nfs_fs_proc_exit(void); | extern void nfs_fs_proc_exit(void); | ||||||
| @ -242,9 +249,12 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); | |||||||
| void nfs_pgio_header_free(struct nfs_pgio_header *); | void nfs_pgio_header_free(struct nfs_pgio_header *); | ||||||
| void nfs_pgio_data_destroy(struct nfs_pgio_header *); | void nfs_pgio_data_destroy(struct nfs_pgio_header *); | ||||||
| int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); | ||||||
| int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | ||||||
| 		      const struct rpc_call_ops *, int, int); | 		      struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, | ||||||
|  | 		      const struct rpc_call_ops *call_ops, int how, int flags); | ||||||
| void nfs_free_request(struct nfs_page *req); | void nfs_free_request(struct nfs_page *req); | ||||||
|  | struct nfs_pgio_mirror * | ||||||
|  | nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); | ||||||
| 
 | 
 | ||||||
| static inline void nfs_iocounter_init(struct nfs_io_counter *c) | static inline void nfs_iocounter_init(struct nfs_io_counter *c) | ||||||
| { | { | ||||||
| @ -252,6 +262,12 @@ static inline void nfs_iocounter_init(struct nfs_io_counter *c) | |||||||
| 	atomic_set(&c->io_count, 0); | 	atomic_set(&c->io_count, 0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) | ||||||
|  | { | ||||||
|  | 	WARN_ON_ONCE(desc->pg_mirror_count < 1); | ||||||
|  | 	return desc->pg_mirror_count > 1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* nfs2xdr.c */ | /* nfs2xdr.c */ | ||||||
| extern struct rpc_procinfo nfs_procedures[]; | extern struct rpc_procinfo nfs_procedures[]; | ||||||
| extern int nfs2_decode_dirent(struct xdr_stream *, | extern int nfs2_decode_dirent(struct xdr_stream *, | ||||||
| @ -427,6 +443,7 @@ extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | |||||||
| extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); | ||||||
| extern int nfs_initiate_commit(struct rpc_clnt *clnt, | extern int nfs_initiate_commit(struct rpc_clnt *clnt, | ||||||
| 			       struct nfs_commit_data *data, | 			       struct nfs_commit_data *data, | ||||||
|  | 			       const struct nfs_rpc_ops *nfs_ops, | ||||||
| 			       const struct rpc_call_ops *call_ops, | 			       const struct rpc_call_ops *call_ops, | ||||||
| 			       int how, int flags); | 			       int how, int flags); | ||||||
| extern void nfs_init_commit(struct nfs_commit_data *data, | extern void nfs_init_commit(struct nfs_commit_data *data, | ||||||
| @ -440,13 +457,15 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||||||
| 		    struct nfs_commit_info *cinfo); | 		    struct nfs_commit_info *cinfo); | ||||||
| void nfs_mark_request_commit(struct nfs_page *req, | void nfs_mark_request_commit(struct nfs_page *req, | ||||||
| 			     struct pnfs_layout_segment *lseg, | 			     struct pnfs_layout_segment *lseg, | ||||||
| 			     struct nfs_commit_info *cinfo); | 			     struct nfs_commit_info *cinfo, | ||||||
|  | 			     u32 ds_commit_idx); | ||||||
| int nfs_write_need_commit(struct nfs_pgio_header *); | int nfs_write_need_commit(struct nfs_pgio_header *); | ||||||
| int nfs_generic_commit_list(struct inode *inode, struct list_head *head, | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, | ||||||
| 			    int how, struct nfs_commit_info *cinfo); | 			    int how, struct nfs_commit_info *cinfo); | ||||||
| void nfs_retry_commit(struct list_head *page_list, | void nfs_retry_commit(struct list_head *page_list, | ||||||
| 		      struct pnfs_layout_segment *lseg, | 		      struct pnfs_layout_segment *lseg, | ||||||
| 		      struct nfs_commit_info *cinfo); | 		      struct nfs_commit_info *cinfo, | ||||||
|  | 		      u32 ds_commit_idx); | ||||||
| void nfs_commitdata_release(struct nfs_commit_data *data); | void nfs_commitdata_release(struct nfs_commit_data *data); | ||||||
| void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, | void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, | ||||||
| 				 struct nfs_commit_info *cinfo); | 				 struct nfs_commit_info *cinfo); | ||||||
| @ -457,6 +476,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, | |||||||
| 		    struct nfs_direct_req *dreq); | 		    struct nfs_direct_req *dreq); | ||||||
| int nfs_key_timeout_notify(struct file *filp, struct inode *inode); | int nfs_key_timeout_notify(struct file *filp, struct inode *inode); | ||||||
| bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); | bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); | ||||||
|  | void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MIGRATION | #ifdef CONFIG_MIGRATION | ||||||
| extern int nfs_migrate_page(struct address_space *, | extern int nfs_migrate_page(struct address_space *, | ||||||
| @ -480,6 +500,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) | |||||||
| 	inode_dio_wait(inode); | 	inode_dio_wait(inode); | ||||||
| } | } | ||||||
| extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | ||||||
|  | extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); | ||||||
| 
 | 
 | ||||||
| /* nfs4proc.c */ | /* nfs4proc.c */ | ||||||
| extern void __nfs4_read_done_cb(struct nfs_pgio_header *); | extern void __nfs4_read_done_cb(struct nfs_pgio_header *); | ||||||
|  | |||||||
| @ -481,7 +481,8 @@ out_overflow: | |||||||
|  *		void; |  *		void; | ||||||
|  *	}; |  *	}; | ||||||
|  */ |  */ | ||||||
| static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) | static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result, | ||||||
|  | 			   __u32 *op_status) | ||||||
| { | { | ||||||
| 	enum nfs_stat status; | 	enum nfs_stat status; | ||||||
| 	int error; | 	int error; | ||||||
| @ -489,6 +490,8 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) | |||||||
| 	error = decode_stat(xdr, &status); | 	error = decode_stat(xdr, &status); | ||||||
| 	if (unlikely(error)) | 	if (unlikely(error)) | ||||||
| 		goto out; | 		goto out; | ||||||
|  | 	if (op_status) | ||||||
|  | 		*op_status = status; | ||||||
| 	if (status != NFS_OK) | 	if (status != NFS_OK) | ||||||
| 		goto out_default; | 		goto out_default; | ||||||
| 	error = decode_fattr(xdr, result); | 	error = decode_fattr(xdr, result); | ||||||
| @ -808,7 +811,7 @@ out_default: | |||||||
| static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, | static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, | ||||||
| 				 struct nfs_fattr *result) | 				 struct nfs_fattr *result) | ||||||
| { | { | ||||||
| 	return decode_attrstat(xdr, result); | 	return decode_attrstat(xdr, result, NULL); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, | static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, | ||||||
| @ -865,6 +868,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, | |||||||
| 	error = decode_stat(xdr, &status); | 	error = decode_stat(xdr, &status); | ||||||
| 	if (unlikely(error)) | 	if (unlikely(error)) | ||||||
| 		goto out; | 		goto out; | ||||||
|  | 	result->op_status = status; | ||||||
| 	if (status != NFS_OK) | 	if (status != NFS_OK) | ||||||
| 		goto out_default; | 		goto out_default; | ||||||
| 	error = decode_fattr(xdr, result->fattr); | 	error = decode_fattr(xdr, result->fattr); | ||||||
| @ -882,7 +886,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, | |||||||
| { | { | ||||||
| 	/* All NFSv2 writes are "file sync" writes */ | 	/* All NFSv2 writes are "file sync" writes */ | ||||||
| 	result->verf->committed = NFS_FILE_SYNC; | 	result->verf->committed = NFS_FILE_SYNC; | ||||||
| 	return decode_attrstat(xdr, result->fattr); | 	return decode_attrstat(xdr, result->fattr, &result->op_status); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | |||||||
| @ -30,5 +30,7 @@ struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subver | |||||||
| struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||||||
| 				     struct nfs_fattr *, rpc_authflavor_t); | 				     struct nfs_fattr *, rpc_authflavor_t); | ||||||
| 
 | 
 | ||||||
|  | /* nfs3super.c */ | ||||||
|  | extern struct nfs_subversion nfs_v3; | ||||||
| 
 | 
 | ||||||
| #endif /* __LINUX_FS_NFS_NFS3_FS_H */ | #endif /* __LINUX_FS_NFS_NFS3_FS_H */ | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| #include <linux/nfs_fs.h> | #include <linux/nfs_fs.h> | ||||||
| #include <linux/nfs_mount.h> | #include <linux/nfs_mount.h> | ||||||
|  | #include <linux/sunrpc/addr.h> | ||||||
| #include "internal.h" | #include "internal.h" | ||||||
| #include "nfs3_fs.h" | #include "nfs3_fs.h" | ||||||
| 
 | 
 | ||||||
| @ -64,3 +65,43 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source, | |||||||
| 		nfs_init_server_aclclient(server); | 		nfs_init_server_aclclient(server); | ||||||
| 	return server; | 	return server; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Set up a pNFS Data Server client over NFSv3. | ||||||
|  |  * | ||||||
|  |  * Return any existing nfs_client that matches server address,port,version | ||||||
|  |  * and minorversion. | ||||||
|  |  * | ||||||
|  |  * For a new nfs_client, use a soft mount (default), a low retrans and a | ||||||
|  |  * low timeout interval so that if a connection is lost, we retry through | ||||||
|  |  * the MDS. | ||||||
|  |  */ | ||||||
|  | struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, | ||||||
|  | 		const struct sockaddr *ds_addr, int ds_addrlen, | ||||||
|  | 		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, | ||||||
|  | 		rpc_authflavor_t au_flavor) | ||||||
|  | { | ||||||
|  | 	struct nfs_client_initdata cl_init = { | ||||||
|  | 		.addr = ds_addr, | ||||||
|  | 		.addrlen = ds_addrlen, | ||||||
|  | 		.nfs_mod = &nfs_v3, | ||||||
|  | 		.proto = ds_proto, | ||||||
|  | 		.net = mds_clp->cl_net, | ||||||
|  | 	}; | ||||||
|  | 	struct rpc_timeout ds_timeout; | ||||||
|  | 	struct nfs_client *clp; | ||||||
|  | 	char buf[INET6_ADDRSTRLEN + 1]; | ||||||
|  | 
 | ||||||
|  | 	/* fake a hostname because lockd wants it */ | ||||||
|  | 	if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) | ||||||
|  | 		return ERR_PTR(-EINVAL); | ||||||
|  | 	cl_init.hostname = buf; | ||||||
|  | 
 | ||||||
|  | 	/* Use the MDS nfs_client cl_ipaddr. */ | ||||||
|  | 	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); | ||||||
|  | 	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, | ||||||
|  | 			     au_flavor); | ||||||
|  | 
 | ||||||
|  | 	return clp; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs3_set_ds_client); | ||||||
|  | |||||||
| @ -800,6 +800,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||||||
| { | { | ||||||
| 	struct inode *inode = hdr->inode; | 	struct inode *inode = hdr->inode; | ||||||
| 
 | 
 | ||||||
|  | 	if (hdr->pgio_done_cb != NULL) | ||||||
|  | 		return hdr->pgio_done_cb(task, hdr); | ||||||
|  | 
 | ||||||
| 	if (nfs3_async_handle_jukebox(task, inode)) | 	if (nfs3_async_handle_jukebox(task, inode)) | ||||||
| 		return -EAGAIN; | 		return -EAGAIN; | ||||||
| 
 | 
 | ||||||
| @ -825,6 +828,9 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||||||
| { | { | ||||||
| 	struct inode *inode = hdr->inode; | 	struct inode *inode = hdr->inode; | ||||||
| 
 | 
 | ||||||
|  | 	if (hdr->pgio_done_cb != NULL) | ||||||
|  | 		return hdr->pgio_done_cb(task, hdr); | ||||||
|  | 
 | ||||||
| 	if (nfs3_async_handle_jukebox(task, inode)) | 	if (nfs3_async_handle_jukebox(task, inode)) | ||||||
| 		return -EAGAIN; | 		return -EAGAIN; | ||||||
| 	if (task->tk_status >= 0) | 	if (task->tk_status >= 0) | ||||||
| @ -845,6 +851,9 @@ static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commi | |||||||
| 
 | 
 | ||||||
| static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) | static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) | ||||||
| { | { | ||||||
|  | 	if (data->commit_done_cb != NULL) | ||||||
|  | 		return data->commit_done_cb(task, data); | ||||||
|  | 
 | ||||||
| 	if (nfs3_async_handle_jukebox(task, data->inode)) | 	if (nfs3_async_handle_jukebox(task, data->inode)) | ||||||
| 		return -EAGAIN; | 		return -EAGAIN; | ||||||
| 	nfs_refresh_inode(data->inode, data->res.fattr); | 	nfs_refresh_inode(data->inode, data->res.fattr); | ||||||
|  | |||||||
| @ -7,7 +7,7 @@ | |||||||
| #include "nfs3_fs.h" | #include "nfs3_fs.h" | ||||||
| #include "nfs.h" | #include "nfs.h" | ||||||
| 
 | 
 | ||||||
| static struct nfs_subversion nfs_v3 = { | struct nfs_subversion nfs_v3 = { | ||||||
| 	.owner = THIS_MODULE, | 	.owner = THIS_MODULE, | ||||||
| 	.nfs_fs   = &nfs_fs_type, | 	.nfs_fs   = &nfs_fs_type, | ||||||
| 	.rpc_vers = &nfs_version3, | 	.rpc_vers = &nfs_version3, | ||||||
|  | |||||||
| @ -1636,6 +1636,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, | |||||||
| 	error = decode_post_op_attr(xdr, result->fattr); | 	error = decode_post_op_attr(xdr, result->fattr); | ||||||
| 	if (unlikely(error)) | 	if (unlikely(error)) | ||||||
| 		goto out; | 		goto out; | ||||||
|  | 	result->op_status = status; | ||||||
| 	if (status != NFS3_OK) | 	if (status != NFS3_OK) | ||||||
| 		goto out_status; | 		goto out_status; | ||||||
| 	error = decode_read3resok(xdr, result); | 	error = decode_read3resok(xdr, result); | ||||||
| @ -1708,6 +1709,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, | |||||||
| 	error = decode_wcc_data(xdr, result->fattr); | 	error = decode_wcc_data(xdr, result->fattr); | ||||||
| 	if (unlikely(error)) | 	if (unlikely(error)) | ||||||
| 		goto out; | 		goto out; | ||||||
|  | 	result->op_status = status; | ||||||
| 	if (status != NFS3_OK) | 	if (status != NFS3_OK) | ||||||
| 		goto out_status; | 		goto out_status; | ||||||
| 	error = decode_write3resok(xdr, result); | 	error = decode_write3resok(xdr, result); | ||||||
| @ -2323,6 +2325,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, | |||||||
| 	error = decode_wcc_data(xdr, result->fattr); | 	error = decode_wcc_data(xdr, result->fattr); | ||||||
| 	if (unlikely(error)) | 	if (unlikely(error)) | ||||||
| 		goto out; | 		goto out; | ||||||
|  | 	result->op_status = status; | ||||||
| 	if (status != NFS3_OK) | 	if (status != NFS3_OK) | ||||||
| 		goto out_status; | 		goto out_status; | ||||||
| 	error = decode_writeverf3(xdr, &result->verf->verifier); | 	error = decode_writeverf3(xdr, &result->verf->verifier); | ||||||
|  | |||||||
| @ -446,6 +446,12 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); | |||||||
| extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); | extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); | ||||||
| extern void nfs_release_seqid(struct nfs_seqid *seqid); | extern void nfs_release_seqid(struct nfs_seqid *seqid); | ||||||
| extern void nfs_free_seqid(struct nfs_seqid *seqid); | extern void nfs_free_seqid(struct nfs_seqid *seqid); | ||||||
|  | extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl, | ||||||
|  | 				struct nfs4_sequence_args *args, | ||||||
|  | 				struct nfs4_sequence_res *res, | ||||||
|  | 				struct rpc_task *task); | ||||||
|  | extern int nfs4_sequence_done(struct rpc_task *task, | ||||||
|  | 			      struct nfs4_sequence_res *res); | ||||||
| 
 | 
 | ||||||
| extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); | extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -849,14 +849,15 @@ error: | |||||||
|  */ |  */ | ||||||
| struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | ||||||
| 		const struct sockaddr *ds_addr, int ds_addrlen, | 		const struct sockaddr *ds_addr, int ds_addrlen, | ||||||
| 		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) | 		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, | ||||||
|  | 		u32 minor_version, rpc_authflavor_t au_flavor) | ||||||
| { | { | ||||||
| 	struct nfs_client_initdata cl_init = { | 	struct nfs_client_initdata cl_init = { | ||||||
| 		.addr = ds_addr, | 		.addr = ds_addr, | ||||||
| 		.addrlen = ds_addrlen, | 		.addrlen = ds_addrlen, | ||||||
| 		.nfs_mod = &nfs_v4, | 		.nfs_mod = &nfs_v4, | ||||||
| 		.proto = ds_proto, | 		.proto = ds_proto, | ||||||
| 		.minorversion = mds_clp->cl_minorversion, | 		.minorversion = minor_version, | ||||||
| 		.net = mds_clp->cl_net, | 		.net = mds_clp->cl_net, | ||||||
| 	}; | 	}; | ||||||
| 	struct rpc_timeout ds_timeout; | 	struct rpc_timeout ds_timeout; | ||||||
| @ -874,7 +875,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||||||
| 	 */ | 	 */ | ||||||
| 	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); | 	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); | ||||||
| 	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, | 	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, | ||||||
| 			     mds_clp->cl_rpcclient->cl_auth->au_flavor); | 			     au_flavor); | ||||||
| 
 | 
 | ||||||
| 	dprintk("<-- %s %p\n", __func__, clp); | 	dprintk("<-- %s %p\n", __func__, clp); | ||||||
| 	return clp; | 	return clp; | ||||||
|  | |||||||
| @ -495,12 +495,11 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) | |||||||
| 	args->sa_privileged = 1; | 	args->sa_privileged = 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int nfs40_setup_sequence(const struct nfs_server *server, | int nfs40_setup_sequence(struct nfs4_slot_table *tbl, | ||||||
| 			 struct nfs4_sequence_args *args, | 			 struct nfs4_sequence_args *args, | ||||||
| 			 struct nfs4_sequence_res *res, | 			 struct nfs4_sequence_res *res, | ||||||
| 			 struct rpc_task *task) | 			 struct rpc_task *task) | ||||||
| { | { | ||||||
| 	struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl; |  | ||||||
| 	struct nfs4_slot *slot; | 	struct nfs4_slot *slot; | ||||||
| 
 | 
 | ||||||
| 	/* slot already allocated? */ | 	/* slot already allocated? */ | ||||||
| @ -535,6 +534,7 @@ out_sleep: | |||||||
| 	spin_unlock(&tbl->slot_tbl_lock); | 	spin_unlock(&tbl->slot_tbl_lock); | ||||||
| 	return -EAGAIN; | 	return -EAGAIN; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs40_setup_sequence); | ||||||
| 
 | 
 | ||||||
| static int nfs40_sequence_done(struct rpc_task *task, | static int nfs40_sequence_done(struct rpc_task *task, | ||||||
| 			       struct nfs4_sequence_res *res) | 			       struct nfs4_sequence_res *res) | ||||||
| @ -694,8 +694,7 @@ out_retry: | |||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(nfs41_sequence_done); | EXPORT_SYMBOL_GPL(nfs41_sequence_done); | ||||||
| 
 | 
 | ||||||
| static int nfs4_sequence_done(struct rpc_task *task, | int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | ||||||
| 			       struct nfs4_sequence_res *res) |  | ||||||
| { | { | ||||||
| 	if (res->sr_slot == NULL) | 	if (res->sr_slot == NULL) | ||||||
| 		return 1; | 		return 1; | ||||||
| @ -703,6 +702,7 @@ static int nfs4_sequence_done(struct rpc_task *task, | |||||||
| 		return nfs40_sequence_done(task, res); | 		return nfs40_sequence_done(task, res); | ||||||
| 	return nfs41_sequence_done(task, res); | 	return nfs41_sequence_done(task, res); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_sequence_done); | ||||||
| 
 | 
 | ||||||
| int nfs41_setup_sequence(struct nfs4_session *session, | int nfs41_setup_sequence(struct nfs4_session *session, | ||||||
| 				struct nfs4_sequence_args *args, | 				struct nfs4_sequence_args *args, | ||||||
| @ -777,7 +777,8 @@ static int nfs4_setup_sequence(const struct nfs_server *server, | |||||||
| 	int ret = 0; | 	int ret = 0; | ||||||
| 
 | 
 | ||||||
| 	if (!session) | 	if (!session) | ||||||
| 		return nfs40_setup_sequence(server, args, res, task); | 		return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, | ||||||
|  | 					    args, res, task); | ||||||
| 
 | 
 | ||||||
| 	dprintk("--> %s clp %p session %p sr_slot %u\n", | 	dprintk("--> %s clp %p session %p sr_slot %u\n", | ||||||
| 		__func__, session->clp, session, res->sr_slot ? | 		__func__, session->clp, session, res->sr_slot ? | ||||||
| @ -818,14 +819,16 @@ static int nfs4_setup_sequence(const struct nfs_server *server, | |||||||
| 			       struct nfs4_sequence_res *res, | 			       struct nfs4_sequence_res *res, | ||||||
| 			       struct rpc_task *task) | 			       struct rpc_task *task) | ||||||
| { | { | ||||||
| 	return nfs40_setup_sequence(server, args, res, task); | 	return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, | ||||||
|  | 				    args, res, task); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int nfs4_sequence_done(struct rpc_task *task, | int nfs4_sequence_done(struct rpc_task *task, | ||||||
| 		       struct nfs4_sequence_res *res) | 		       struct nfs4_sequence_res *res) | ||||||
| { | { | ||||||
| 	return nfs40_sequence_done(task, res); | 	return nfs40_sequence_done(task, res); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_sequence_done); | ||||||
| 
 | 
 | ||||||
| #endif	/* !CONFIG_NFS_V4_1 */ | #endif	/* !CONFIG_NFS_V4_1 */ | ||||||
| 
 | 
 | ||||||
| @ -1712,8 +1715,8 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) | |||||||
| { | { | ||||||
| 	struct nfs4_opendata *data = calldata; | 	struct nfs4_opendata *data = calldata; | ||||||
| 
 | 
 | ||||||
| 	nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args, | 	nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl, | ||||||
| 				&data->c_res.seq_res, task); | 			     &data->c_arg.seq_args, &data->c_res.seq_res, task); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | ||||||
| @ -5994,8 +5997,8 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata | |||||||
| { | { | ||||||
| 	struct nfs_release_lockowner_data *data = calldata; | 	struct nfs_release_lockowner_data *data = calldata; | ||||||
| 	struct nfs_server *server = data->server; | 	struct nfs_server *server = data->server; | ||||||
| 	nfs40_setup_sequence(server, &data->args.seq_args, | 	nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, | ||||||
| 				&data->res.seq_res, task); | 			     &data->args.seq_args, &data->res.seq_res, task); | ||||||
| 	data->args.lock_owner.clientid = server->nfs_client->cl_clientid; | 	data->args.lock_owner.clientid = server->nfs_client->cl_clientid; | ||||||
| 	data->timestamp = jiffies; | 	data->timestamp = jiffies; | ||||||
| } | } | ||||||
| @ -7557,6 +7560,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||||||
| 		return; | 		return; | ||||||
| 	if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, | 	if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, | ||||||
| 					  NFS_I(lgp->args.inode)->layout, | 					  NFS_I(lgp->args.inode)->layout, | ||||||
|  | 					  &lgp->args.range, | ||||||
| 					  lgp->args.ctx->state)) { | 					  lgp->args.ctx->state)) { | ||||||
| 		rpc_exit(task, NFS4_OK); | 		rpc_exit(task, NFS4_OK); | ||||||
| 	} | 	} | ||||||
| @ -7812,6 +7816,9 @@ static void nfs4_layoutreturn_release(void *calldata) | |||||||
| 	spin_lock(&lo->plh_inode->i_lock); | 	spin_lock(&lo->plh_inode->i_lock); | ||||||
| 	if (lrp->res.lrs_present) | 	if (lrp->res.lrs_present) | ||||||
| 		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | ||||||
|  | 	pnfs_clear_layoutreturn_waitbit(lo); | ||||||
|  | 	clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); | ||||||
|  | 	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | ||||||
| 	lo->plh_block_lgets--; | 	lo->plh_block_lgets--; | ||||||
| 	spin_unlock(&lo->plh_inode->i_lock); | 	spin_unlock(&lo->plh_inode->i_lock); | ||||||
| 	pnfs_put_layout_hdr(lrp->args.layout); | 	pnfs_put_layout_hdr(lrp->args.layout); | ||||||
| @ -7825,7 +7832,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { | |||||||
| 	.rpc_release = nfs4_layoutreturn_release, | 	.rpc_release = nfs4_layoutreturn_release, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) | ||||||
| { | { | ||||||
| 	struct rpc_task *task; | 	struct rpc_task *task; | ||||||
| 	struct rpc_message msg = { | 	struct rpc_message msg = { | ||||||
| @ -7839,16 +7846,23 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||||||
| 		.rpc_message = &msg, | 		.rpc_message = &msg, | ||||||
| 		.callback_ops = &nfs4_layoutreturn_call_ops, | 		.callback_ops = &nfs4_layoutreturn_call_ops, | ||||||
| 		.callback_data = lrp, | 		.callback_data = lrp, | ||||||
|  | 		.flags = RPC_TASK_ASYNC, | ||||||
| 	}; | 	}; | ||||||
| 	int status; | 	int status = 0; | ||||||
| 
 | 
 | ||||||
| 	dprintk("--> %s\n", __func__); | 	dprintk("--> %s\n", __func__); | ||||||
| 	nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); | 	nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); | ||||||
| 	task = rpc_run_task(&task_setup_data); | 	task = rpc_run_task(&task_setup_data); | ||||||
| 	if (IS_ERR(task)) | 	if (IS_ERR(task)) | ||||||
| 		return PTR_ERR(task); | 		return PTR_ERR(task); | ||||||
|  | 	if (sync == false) | ||||||
|  | 		goto out; | ||||||
|  | 	status = nfs4_wait_for_completion_rpc_task(task); | ||||||
|  | 	if (status != 0) | ||||||
|  | 		goto out; | ||||||
| 	status = task->tk_status; | 	status = task->tk_status; | ||||||
| 	trace_nfs4_layoutreturn(lrp->args.inode, status); | 	trace_nfs4_layoutreturn(lrp->args.inode, status); | ||||||
|  | out: | ||||||
| 	dprintk("<-- %s status=%d\n", __func__, status); | 	dprintk("<-- %s status=%d\n", __func__, status); | ||||||
| 	rpc_put_task(task); | 	rpc_put_task(task); | ||||||
| 	return status; | 	return status; | ||||||
|  | |||||||
| @ -346,6 +346,9 @@ out: | |||||||
| 
 | 
 | ||||||
| static void __exit exit_nfs_v4(void) | static void __exit exit_nfs_v4(void) | ||||||
| { | { | ||||||
|  | 	/* Not called in the _init(), conditionally loaded */ | ||||||
|  | 	nfs4_pnfs_v3_ds_connect_unload(); | ||||||
|  | 
 | ||||||
| 	unregister_nfs_version(&nfs_v4); | 	unregister_nfs_version(&nfs_v4); | ||||||
| 	nfs4_unregister_sysctl(); | 	nfs4_unregister_sysctl(); | ||||||
| 	nfs_idmap_quit(); | 	nfs_idmap_quit(); | ||||||
|  | |||||||
| @ -2011,11 +2011,11 @@ encode_layoutreturn(struct xdr_stream *xdr, | |||||||
| 	p = reserve_space(xdr, 16); | 	p = reserve_space(xdr, 16); | ||||||
| 	*p++ = cpu_to_be32(0);		/* reclaim. always 0 for now */ | 	*p++ = cpu_to_be32(0);		/* reclaim. always 0 for now */ | ||||||
| 	*p++ = cpu_to_be32(args->layout_type); | 	*p++ = cpu_to_be32(args->layout_type); | ||||||
| 	*p++ = cpu_to_be32(IOMODE_ANY); | 	*p++ = cpu_to_be32(args->range.iomode); | ||||||
| 	*p = cpu_to_be32(RETURN_FILE); | 	*p = cpu_to_be32(RETURN_FILE); | ||||||
| 	p = reserve_space(xdr, 16); | 	p = reserve_space(xdr, 16); | ||||||
| 	p = xdr_encode_hyper(p, 0); | 	p = xdr_encode_hyper(p, args->range.offset); | ||||||
| 	p = xdr_encode_hyper(p, NFS4_MAX_UINT64); | 	p = xdr_encode_hyper(p, args->range.length); | ||||||
| 	spin_lock(&args->inode->i_lock); | 	spin_lock(&args->inode->i_lock); | ||||||
| 	encode_nfs4_stateid(xdr, &args->stateid); | 	encode_nfs4_stateid(xdr, &args->stateid); | ||||||
| 	spin_unlock(&args->inode->i_lock); | 	spin_unlock(&args->inode->i_lock); | ||||||
| @ -6566,6 +6566,7 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||||||
| 	int status; | 	int status; | ||||||
| 
 | 
 | ||||||
| 	status = decode_compound_hdr(xdr, &hdr); | 	status = decode_compound_hdr(xdr, &hdr); | ||||||
|  | 	res->op_status = hdr.status; | ||||||
| 	if (status) | 	if (status) | ||||||
| 		goto out; | 		goto out; | ||||||
| 	status = decode_sequence(xdr, &res->seq_res, rqstp); | 	status = decode_sequence(xdr, &res->seq_res, rqstp); | ||||||
| @ -6591,6 +6592,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||||||
| 	int status; | 	int status; | ||||||
| 
 | 
 | ||||||
| 	status = decode_compound_hdr(xdr, &hdr); | 	status = decode_compound_hdr(xdr, &hdr); | ||||||
|  | 	res->op_status = hdr.status; | ||||||
| 	if (status) | 	if (status) | ||||||
| 		goto out; | 		goto out; | ||||||
| 	status = decode_sequence(xdr, &res->seq_res, rqstp); | 	status = decode_sequence(xdr, &res->seq_res, rqstp); | ||||||
| @ -6620,6 +6622,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||||||
| 	int status; | 	int status; | ||||||
| 
 | 
 | ||||||
| 	status = decode_compound_hdr(xdr, &hdr); | 	status = decode_compound_hdr(xdr, &hdr); | ||||||
|  | 	res->op_status = hdr.status; | ||||||
| 	if (status) | 	if (status) | ||||||
| 		goto out; | 		goto out; | ||||||
| 	status = decode_sequence(xdr, &res->seq_res, rqstp); | 	status = decode_sequence(xdr, &res->seq_res, rqstp); | ||||||
|  | |||||||
| @ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) | |||||||
| static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, | static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, | ||||||
| 			  struct nfs_page *prev, struct nfs_page *req) | 			  struct nfs_page *prev, struct nfs_page *req) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio); | ||||||
| 	unsigned int size; | 	unsigned int size; | ||||||
| 
 | 
 | ||||||
| 	size = pnfs_generic_pg_test(pgio, prev, req); | 	size = pnfs_generic_pg_test(pgio, prev, req); | ||||||
| 
 | 
 | ||||||
| 	if (!size || pgio->pg_count + req->wb_bytes > | 	if (!size || mirror->pg_count + req->wb_bytes > | ||||||
| 	    (unsigned long)pgio->pg_layout_private) | 	    (unsigned long)pgio->pg_layout_private) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| @ -607,12 +608,14 @@ static const struct nfs_pageio_ops objio_pg_read_ops = { | |||||||
| 	.pg_init = objio_init_read, | 	.pg_init = objio_init_read, | ||||||
| 	.pg_test = objio_pg_test, | 	.pg_test = objio_pg_test, | ||||||
| 	.pg_doio = pnfs_generic_pg_readpages, | 	.pg_doio = pnfs_generic_pg_readpages, | ||||||
|  | 	.pg_cleanup = pnfs_generic_pg_cleanup, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static const struct nfs_pageio_ops objio_pg_write_ops = { | static const struct nfs_pageio_ops objio_pg_write_ops = { | ||||||
| 	.pg_init = objio_init_write, | 	.pg_init = objio_init_write, | ||||||
| 	.pg_test = objio_pg_test, | 	.pg_test = objio_pg_test, | ||||||
| 	.pg_doio = pnfs_generic_pg_writepages, | 	.pg_doio = pnfs_generic_pg_writepages, | ||||||
|  | 	.pg_cleanup = pnfs_generic_pg_cleanup, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct pnfs_layoutdriver_type objlayout_type = { | static struct pnfs_layoutdriver_type objlayout_type = { | ||||||
|  | |||||||
| @ -42,21 +42,35 @@ static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) | |||||||
| 	return p->pagevec != NULL; | 	return p->pagevec != NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct nfs_pgio_mirror * | ||||||
|  | nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) | ||||||
|  | { | ||||||
|  | 	return nfs_pgio_has_mirroring(desc) ? | ||||||
|  | 		&desc->pg_mirrors[desc->pg_mirror_idx] : | ||||||
|  | 		&desc->pg_mirrors[0]; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); | ||||||
|  | 
 | ||||||
| void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | ||||||
| 		       struct nfs_pgio_header *hdr, | 		       struct nfs_pgio_header *hdr, | ||||||
| 		       void (*release)(struct nfs_pgio_header *hdr)) | 		       void (*release)(struct nfs_pgio_header *hdr)) | ||||||
| { | { | ||||||
| 	hdr->req = nfs_list_entry(desc->pg_list.next); | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	hdr->req = nfs_list_entry(mirror->pg_list.next); | ||||||
| 	hdr->inode = desc->pg_inode; | 	hdr->inode = desc->pg_inode; | ||||||
| 	hdr->cred = hdr->req->wb_context->cred; | 	hdr->cred = hdr->req->wb_context->cred; | ||||||
| 	hdr->io_start = req_offset(hdr->req); | 	hdr->io_start = req_offset(hdr->req); | ||||||
| 	hdr->good_bytes = desc->pg_count; | 	hdr->good_bytes = mirror->pg_count; | ||||||
| 	hdr->dreq = desc->pg_dreq; | 	hdr->dreq = desc->pg_dreq; | ||||||
| 	hdr->layout_private = desc->pg_layout_private; | 	hdr->layout_private = desc->pg_layout_private; | ||||||
| 	hdr->release = release; | 	hdr->release = release; | ||||||
| 	hdr->completion_ops = desc->pg_completion_ops; | 	hdr->completion_ops = desc->pg_completion_ops; | ||||||
| 	if (hdr->completion_ops->init_hdr) | 	if (hdr->completion_ops->init_hdr) | ||||||
| 		hdr->completion_ops->init_hdr(hdr); | 		hdr->completion_ops->init_hdr(hdr); | ||||||
|  | 
 | ||||||
|  | 	hdr->pgio_mirror_idx = desc->pg_mirror_idx; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(nfs_pgheader_init); | EXPORT_SYMBOL_GPL(nfs_pgheader_init); | ||||||
| 
 | 
 | ||||||
| @ -480,7 +494,10 @@ nfs_wait_on_request(struct nfs_page *req) | |||||||
| size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | ||||||
| 			   struct nfs_page *prev, struct nfs_page *req) | 			   struct nfs_page *prev, struct nfs_page *req) | ||||||
| { | { | ||||||
| 	if (desc->pg_count > desc->pg_bsize) { | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	if (mirror->pg_count > mirror->pg_bsize) { | ||||||
| 		/* should never happen */ | 		/* should never happen */ | ||||||
| 		WARN_ON_ONCE(1); | 		WARN_ON_ONCE(1); | ||||||
| 		return 0; | 		return 0; | ||||||
| @ -490,11 +507,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | |||||||
| 	 * Limit the request size so that we can still allocate a page array | 	 * Limit the request size so that we can still allocate a page array | ||||||
| 	 * for it without upsetting the slab allocator. | 	 * for it without upsetting the slab allocator. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * | 	if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * | ||||||
| 			sizeof(struct page) > PAGE_SIZE) | 			sizeof(struct page) > PAGE_SIZE) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| 	return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); | 	return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | ||||||
| 
 | 
 | ||||||
| @ -597,13 +614,14 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | ||||||
|  | 		      struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, | ||||||
| 		      const struct rpc_call_ops *call_ops, int how, int flags) | 		      const struct rpc_call_ops *call_ops, int how, int flags) | ||||||
| { | { | ||||||
| 	struct rpc_task *task; | 	struct rpc_task *task; | ||||||
| 	struct rpc_message msg = { | 	struct rpc_message msg = { | ||||||
| 		.rpc_argp = &hdr->args, | 		.rpc_argp = &hdr->args, | ||||||
| 		.rpc_resp = &hdr->res, | 		.rpc_resp = &hdr->res, | ||||||
| 		.rpc_cred = hdr->cred, | 		.rpc_cred = cred, | ||||||
| 	}; | 	}; | ||||||
| 	struct rpc_task_setup task_setup_data = { | 	struct rpc_task_setup task_setup_data = { | ||||||
| 		.rpc_client = clnt, | 		.rpc_client = clnt, | ||||||
| @ -616,7 +634,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | |||||||
| 	}; | 	}; | ||||||
| 	int ret = 0; | 	int ret = 0; | ||||||
| 
 | 
 | ||||||
| 	hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); | 	hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); | ||||||
| 
 | 
 | ||||||
| 	dprintk("NFS: %5u initiated pgio call " | 	dprintk("NFS: %5u initiated pgio call " | ||||||
| 		"(req %s/%llu, %u bytes @ offset %llu)\n", | 		"(req %s/%llu, %u bytes @ offset %llu)\n", | ||||||
| @ -650,10 +668,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); | |||||||
| static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | ||||||
| 			  struct nfs_pgio_header *hdr) | 			  struct nfs_pgio_header *hdr) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror; | ||||||
|  | 	u32 midx; | ||||||
|  | 
 | ||||||
| 	set_bit(NFS_IOHDR_REDO, &hdr->flags); | 	set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||||||
| 	nfs_pgio_data_destroy(hdr); | 	nfs_pgio_data_destroy(hdr); | ||||||
| 	hdr->completion_ops->completion(hdr); | 	hdr->completion_ops->completion(hdr); | ||||||
| 	desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 	/* TODO: Make sure it's right to clean up all mirrors here
 | ||||||
|  | 	 *       and not just hdr->pgio_mirror_idx */ | ||||||
|  | 	for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||||||
|  | 		mirror = &desc->pg_mirrors[midx]; | ||||||
|  | 		desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||||||
|  | 	} | ||||||
| 	return -ENOMEM; | 	return -ENOMEM; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -670,6 +696,17 @@ static void nfs_pgio_release(void *calldata) | |||||||
| 	hdr->completion_ops->completion(hdr); | 	hdr->completion_ops->completion(hdr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, | ||||||
|  | 				   unsigned int bsize) | ||||||
|  | { | ||||||
|  | 	INIT_LIST_HEAD(&mirror->pg_list); | ||||||
|  | 	mirror->pg_bytes_written = 0; | ||||||
|  | 	mirror->pg_count = 0; | ||||||
|  | 	mirror->pg_bsize = bsize; | ||||||
|  | 	mirror->pg_base = 0; | ||||||
|  | 	mirror->pg_recoalesce = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * nfs_pageio_init - initialise a page io descriptor |  * nfs_pageio_init - initialise a page io descriptor | ||||||
|  * @desc: pointer to descriptor |  * @desc: pointer to descriptor | ||||||
| @ -686,13 +723,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||||||
| 		     size_t bsize, | 		     size_t bsize, | ||||||
| 		     int io_flags) | 		     int io_flags) | ||||||
| { | { | ||||||
| 	INIT_LIST_HEAD(&desc->pg_list); | 	struct nfs_pgio_mirror *new; | ||||||
| 	desc->pg_bytes_written = 0; | 	int i; | ||||||
| 	desc->pg_count = 0; | 
 | ||||||
| 	desc->pg_bsize = bsize; |  | ||||||
| 	desc->pg_base = 0; |  | ||||||
| 	desc->pg_moreio = 0; | 	desc->pg_moreio = 0; | ||||||
| 	desc->pg_recoalesce = 0; |  | ||||||
| 	desc->pg_inode = inode; | 	desc->pg_inode = inode; | ||||||
| 	desc->pg_ops = pg_ops; | 	desc->pg_ops = pg_ops; | ||||||
| 	desc->pg_completion_ops = compl_ops; | 	desc->pg_completion_ops = compl_ops; | ||||||
| @ -702,6 +736,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||||||
| 	desc->pg_lseg = NULL; | 	desc->pg_lseg = NULL; | ||||||
| 	desc->pg_dreq = NULL; | 	desc->pg_dreq = NULL; | ||||||
| 	desc->pg_layout_private = NULL; | 	desc->pg_layout_private = NULL; | ||||||
|  | 	desc->pg_bsize = bsize; | ||||||
|  | 
 | ||||||
|  | 	desc->pg_mirror_count = 1; | ||||||
|  | 	desc->pg_mirror_idx = 0; | ||||||
|  | 
 | ||||||
|  | 	if (pg_ops->pg_get_mirror_count) { | ||||||
|  | 		/* until we have a request, we don't have an lseg and no
 | ||||||
|  | 		 * idea how many mirrors there will be */ | ||||||
|  | 		new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, | ||||||
|  | 			      sizeof(struct nfs_pgio_mirror), GFP_KERNEL); | ||||||
|  | 		desc->pg_mirrors_dynamic = new; | ||||||
|  | 		desc->pg_mirrors = new; | ||||||
|  | 
 | ||||||
|  | 		for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) | ||||||
|  | 			nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); | ||||||
|  | 	} else { | ||||||
|  | 		desc->pg_mirrors_dynamic = NULL; | ||||||
|  | 		desc->pg_mirrors = desc->pg_mirrors_static; | ||||||
|  | 		nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); | ||||||
|  | 	} | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(nfs_pageio_init); | EXPORT_SYMBOL_GPL(nfs_pageio_init); | ||||||
| 
 | 
 | ||||||
| @ -737,14 +791,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) | |||||||
| int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | ||||||
| 		     struct nfs_pgio_header *hdr) | 		     struct nfs_pgio_header *hdr) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	struct nfs_page		*req; | 	struct nfs_page		*req; | ||||||
| 	struct page		**pages, | 	struct page		**pages, | ||||||
| 				*last_page; | 				*last_page; | ||||||
| 	struct list_head *head = &desc->pg_list; | 	struct list_head *head = &mirror->pg_list; | ||||||
| 	struct nfs_commit_info cinfo; | 	struct nfs_commit_info cinfo; | ||||||
| 	unsigned int pagecount, pageused; | 	unsigned int pagecount, pageused; | ||||||
| 
 | 
 | ||||||
| 	pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); | 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); | ||||||
| 	if (!nfs_pgarray_set(&hdr->page_array, pagecount)) | 	if (!nfs_pgarray_set(&hdr->page_array, pagecount)) | ||||||
| 		return nfs_pgio_error(desc, hdr); | 		return nfs_pgio_error(desc, hdr); | ||||||
| 
 | 
 | ||||||
| @ -772,7 +828,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||||||
| 		desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 		desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||||||
| 
 | 
 | ||||||
| 	/* Set up the argument struct */ | 	/* Set up the argument struct */ | ||||||
| 	nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | 	nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); | ||||||
| 	desc->pg_rpc_callops = &nfs_pgio_common_ops; | 	desc->pg_rpc_callops = &nfs_pgio_common_ops; | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| @ -780,23 +836,74 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); | |||||||
| 
 | 
 | ||||||
| static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror; | ||||||
| 	struct nfs_pgio_header *hdr; | 	struct nfs_pgio_header *hdr; | ||||||
| 	int ret; | 	int ret; | ||||||
| 
 | 
 | ||||||
|  | 	mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | ||||||
| 	if (!hdr) { | 	if (!hdr) { | ||||||
| 		desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 		/* TODO: make sure this is right with mirroring - or
 | ||||||
|  | 		 *       should it back out all mirrors? */ | ||||||
|  | 		desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 	} | 	} | ||||||
| 	nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); | 	nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); | ||||||
| 	ret = nfs_generic_pgio(desc, hdr); | 	ret = nfs_generic_pgio(desc, hdr); | ||||||
| 	if (ret == 0) | 	if (ret == 0) | ||||||
| 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), | 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), | ||||||
| 					hdr, desc->pg_rpc_callops, | 					hdr, | ||||||
|  | 					hdr->cred, | ||||||
|  | 					NFS_PROTO(hdr->inode), | ||||||
|  | 					desc->pg_rpc_callops, | ||||||
| 					desc->pg_ioflags, 0); | 					desc->pg_ioflags, 0); | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * nfs_pageio_setup_mirroring - determine if mirroring is to be used | ||||||
|  |  *				by calling the pg_get_mirror_count op | ||||||
|  |  */ | ||||||
|  | static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, | ||||||
|  | 				       struct nfs_page *req) | ||||||
|  | { | ||||||
|  | 	int mirror_count = 1; | ||||||
|  | 
 | ||||||
|  | 	if (!pgio->pg_ops->pg_get_mirror_count) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); | ||||||
|  | 
 | ||||||
|  | 	if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	pgio->pg_mirror_count = mirror_count; | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) | ||||||
|  |  */ | ||||||
|  | void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) | ||||||
|  | { | ||||||
|  | 	pgio->pg_mirror_count = 1; | ||||||
|  | 	pgio->pg_mirror_idx = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) | ||||||
|  | { | ||||||
|  | 	pgio->pg_mirror_count = 1; | ||||||
|  | 	pgio->pg_mirror_idx = 0; | ||||||
|  | 	pgio->pg_mirrors = pgio->pg_mirrors_static; | ||||||
|  | 	kfree(pgio->pg_mirrors_dynamic); | ||||||
|  | 	pgio->pg_mirrors_dynamic = NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | ||||||
| 		const struct nfs_open_context *ctx2) | 		const struct nfs_open_context *ctx2) | ||||||
| { | { | ||||||
| @ -863,19 +970,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||||||
| static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | ||||||
| 				     struct nfs_page *req) | 				     struct nfs_page *req) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	struct nfs_page *prev = NULL; | 	struct nfs_page *prev = NULL; | ||||||
| 	if (desc->pg_count != 0) { | 
 | ||||||
| 		prev = nfs_list_entry(desc->pg_list.prev); | 	if (mirror->pg_count != 0) { | ||||||
|  | 		prev = nfs_list_entry(mirror->pg_list.prev); | ||||||
| 	} else { | 	} else { | ||||||
| 		if (desc->pg_ops->pg_init) | 		if (desc->pg_ops->pg_init) | ||||||
| 			desc->pg_ops->pg_init(desc, req); | 			desc->pg_ops->pg_init(desc, req); | ||||||
| 		desc->pg_base = req->wb_pgbase; | 		mirror->pg_base = req->wb_pgbase; | ||||||
| 	} | 	} | ||||||
| 	if (!nfs_can_coalesce_requests(prev, req, desc)) | 	if (!nfs_can_coalesce_requests(prev, req, desc)) | ||||||
| 		return 0; | 		return 0; | ||||||
| 	nfs_list_remove_request(req); | 	nfs_list_remove_request(req); | ||||||
| 	nfs_list_add_request(req, &desc->pg_list); | 	nfs_list_add_request(req, &mirror->pg_list); | ||||||
| 	desc->pg_count += req->wb_bytes; | 	mirror->pg_count += req->wb_bytes; | ||||||
| 	return 1; | 	return 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -884,16 +994,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||||||
|  */ |  */ | ||||||
| static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | ||||||
| { | { | ||||||
| 	if (!list_empty(&desc->pg_list)) { | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	if (!list_empty(&mirror->pg_list)) { | ||||||
| 		int error = desc->pg_ops->pg_doio(desc); | 		int error = desc->pg_ops->pg_doio(desc); | ||||||
| 		if (error < 0) | 		if (error < 0) | ||||||
| 			desc->pg_error = error; | 			desc->pg_error = error; | ||||||
| 		else | 		else | ||||||
| 			desc->pg_bytes_written += desc->pg_count; | 			mirror->pg_bytes_written += mirror->pg_count; | ||||||
| 	} | 	} | ||||||
| 	if (list_empty(&desc->pg_list)) { | 	if (list_empty(&mirror->pg_list)) { | ||||||
| 		desc->pg_count = 0; | 		mirror->pg_count = 0; | ||||||
| 		desc->pg_base = 0; | 		mirror->pg_base = 0; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -911,6 +1024,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||||||
| static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | ||||||
| 			   struct nfs_page *req) | 			   struct nfs_page *req) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	struct nfs_page *subreq; | 	struct nfs_page *subreq; | ||||||
| 	unsigned int bytes_left = 0; | 	unsigned int bytes_left = 0; | ||||||
| 	unsigned int offset, pgbase; | 	unsigned int offset, pgbase; | ||||||
| @ -934,7 +1049,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||||||
| 			nfs_pageio_doio(desc); | 			nfs_pageio_doio(desc); | ||||||
| 			if (desc->pg_error < 0) | 			if (desc->pg_error < 0) | ||||||
| 				return 0; | 				return 0; | ||||||
| 			if (desc->pg_recoalesce) | 			if (mirror->pg_recoalesce) | ||||||
| 				return 0; | 				return 0; | ||||||
| 			/* retry add_request for this subreq */ | 			/* retry add_request for this subreq */ | ||||||
| 			nfs_page_group_lock(req, false); | 			nfs_page_group_lock(req, false); | ||||||
| @ -972,14 +1087,16 @@ err_ptr: | |||||||
| 
 | 
 | ||||||
| static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
| 	LIST_HEAD(head); | 	LIST_HEAD(head); | ||||||
| 
 | 
 | ||||||
| 	do { | 	do { | ||||||
| 		list_splice_init(&desc->pg_list, &head); | 		list_splice_init(&mirror->pg_list, &head); | ||||||
| 		desc->pg_bytes_written -= desc->pg_count; | 		mirror->pg_bytes_written -= mirror->pg_count; | ||||||
| 		desc->pg_count = 0; | 		mirror->pg_count = 0; | ||||||
| 		desc->pg_base = 0; | 		mirror->pg_base = 0; | ||||||
| 		desc->pg_recoalesce = 0; | 		mirror->pg_recoalesce = 0; | ||||||
|  | 
 | ||||||
| 		desc->pg_moreio = 0; | 		desc->pg_moreio = 0; | ||||||
| 
 | 
 | ||||||
| 		while (!list_empty(&head)) { | 		while (!list_empty(&head)) { | ||||||
| @ -993,11 +1110,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | |||||||
| 				return 0; | 				return 0; | ||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 	} while (desc->pg_recoalesce); | 	} while (mirror->pg_recoalesce); | ||||||
| 	return 1; | 	return 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, | ||||||
| 		struct nfs_page *req) | 		struct nfs_page *req) | ||||||
| { | { | ||||||
| 	int ret; | 	int ret; | ||||||
| @ -1010,9 +1127,80 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||||||
| 			break; | 			break; | ||||||
| 		ret = nfs_do_recoalesce(desc); | 		ret = nfs_do_recoalesce(desc); | ||||||
| 	} while (ret); | 	} while (ret); | ||||||
|  | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | ||||||
|  | 			   struct nfs_page *req) | ||||||
|  | { | ||||||
|  | 	u32 midx; | ||||||
|  | 	unsigned int pgbase, offset, bytes; | ||||||
|  | 	struct nfs_page *dupreq, *lastreq; | ||||||
|  | 
 | ||||||
|  | 	pgbase = req->wb_pgbase; | ||||||
|  | 	offset = req->wb_offset; | ||||||
|  | 	bytes = req->wb_bytes; | ||||||
|  | 
 | ||||||
|  | 	nfs_pageio_setup_mirroring(desc, req); | ||||||
|  | 
 | ||||||
|  | 	for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||||||
|  | 		if (midx) { | ||||||
|  | 			nfs_page_group_lock(req, false); | ||||||
|  | 
 | ||||||
|  | 			/* find the last request */ | ||||||
|  | 			for (lastreq = req->wb_head; | ||||||
|  | 			     lastreq->wb_this_page != req->wb_head; | ||||||
|  | 			     lastreq = lastreq->wb_this_page) | ||||||
|  | 				; | ||||||
|  | 
 | ||||||
|  | 			dupreq = nfs_create_request(req->wb_context, | ||||||
|  | 					req->wb_page, lastreq, pgbase, bytes); | ||||||
|  | 
 | ||||||
|  | 			if (IS_ERR(dupreq)) { | ||||||
|  | 				nfs_page_group_unlock(req); | ||||||
|  | 				return 0; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			nfs_lock_request(dupreq); | ||||||
|  | 			nfs_page_group_unlock(req); | ||||||
|  | 			dupreq->wb_offset = offset; | ||||||
|  | 			dupreq->wb_index = req->wb_index; | ||||||
|  | 		} else | ||||||
|  | 			dupreq = req; | ||||||
|  | 
 | ||||||
|  | 		if (nfs_pgio_has_mirroring(desc)) | ||||||
|  | 			desc->pg_mirror_idx = midx; | ||||||
|  | 		if (!nfs_pageio_add_request_mirror(desc, dupreq)) | ||||||
|  | 			return 0; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an | ||||||
|  |  *				nfs_pageio_descriptor | ||||||
|  |  * @desc: pointer to io descriptor | ||||||
|  |  */ | ||||||
|  | static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, | ||||||
|  | 				       u32 mirror_idx) | ||||||
|  | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; | ||||||
|  | 	u32 restore_idx = desc->pg_mirror_idx; | ||||||
|  | 
 | ||||||
|  | 	if (nfs_pgio_has_mirroring(desc)) | ||||||
|  | 		desc->pg_mirror_idx = mirror_idx; | ||||||
|  | 	for (;;) { | ||||||
|  | 		nfs_pageio_doio(desc); | ||||||
|  | 		if (!mirror->pg_recoalesce) | ||||||
|  | 			break; | ||||||
|  | 		if (!nfs_do_recoalesce(desc)) | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
|  | 	desc->pg_mirror_idx = restore_idx; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * nfs_pageio_resend - Transfer requests to new descriptor and resend |  * nfs_pageio_resend - Transfer requests to new descriptor and resend | ||||||
|  * @hdr - the pgio header to move request from |  * @hdr - the pgio header to move request from | ||||||
| @ -1046,18 +1234,19 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, | |||||||
| EXPORT_SYMBOL_GPL(nfs_pageio_resend); | EXPORT_SYMBOL_GPL(nfs_pageio_resend); | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor |  * nfs_pageio_complete - Complete I/O then cleanup an nfs_pageio_descriptor | ||||||
|  * @desc: pointer to io descriptor |  * @desc: pointer to io descriptor | ||||||
|  */ |  */ | ||||||
| void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | ||||||
| { | { | ||||||
| 	for (;;) { | 	u32 midx; | ||||||
| 		nfs_pageio_doio(desc); | 
 | ||||||
| 		if (!desc->pg_recoalesce) | 	for (midx = 0; midx < desc->pg_mirror_count; midx++) | ||||||
| 			break; | 		nfs_pageio_complete_mirror(desc, midx); | ||||||
| 		if (!nfs_do_recoalesce(desc)) | 
 | ||||||
| 			break; | 	if (desc->pg_ops->pg_cleanup) | ||||||
| 	} | 		desc->pg_ops->pg_cleanup(desc); | ||||||
|  | 	nfs_pageio_cleanup_mirroring(desc); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
| @ -1073,10 +1262,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | |||||||
|  */ |  */ | ||||||
| void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) | void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) | ||||||
| { | { | ||||||
| 	if (!list_empty(&desc->pg_list)) { | 	struct nfs_pgio_mirror *mirror; | ||||||
| 		struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); | 	struct nfs_page *prev; | ||||||
|  | 	u32 midx; | ||||||
|  | 
 | ||||||
|  | 	for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||||||
|  | 		mirror = &desc->pg_mirrors[midx]; | ||||||
|  | 		if (!list_empty(&mirror->pg_list)) { | ||||||
|  | 			prev = nfs_list_entry(mirror->pg_list.prev); | ||||||
| 			if (index != prev->wb_index + 1) | 			if (index != prev->wb_index + 1) | ||||||
| 			nfs_pageio_complete(desc); | 				nfs_pageio_complete_mirror(desc, midx); | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										421
									
								
								fs/nfs/pnfs.c
									
									
									
									
									
								
							
							
						
						
									
										421
									
								
								fs/nfs/pnfs.c
									
									
									
									
									
								
							| @ -51,6 +51,10 @@ static DEFINE_SPINLOCK(pnfs_spinlock); | |||||||
|  */ |  */ | ||||||
| static LIST_HEAD(pnfs_modules_tbl); | static LIST_HEAD(pnfs_modules_tbl); | ||||||
| 
 | 
 | ||||||
|  | static int | ||||||
|  | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | ||||||
|  | 		       enum pnfs_iomode iomode, bool sync); | ||||||
|  | 
 | ||||||
| /* Return the registered pnfs layout driver module matching given id */ | /* Return the registered pnfs layout driver module matching given id */ | ||||||
| static struct pnfs_layoutdriver_type * | static struct pnfs_layoutdriver_type * | ||||||
| find_pnfs_driver_locked(u32 id) | find_pnfs_driver_locked(u32 id) | ||||||
| @ -239,6 +243,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | |||||||
| 	struct inode *inode = lo->plh_inode; | 	struct inode *inode = lo->plh_inode; | ||||||
| 
 | 
 | ||||||
| 	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | ||||||
|  | 		if (!list_empty(&lo->plh_segs)) | ||||||
|  | 			WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); | ||||||
| 		pnfs_detach_layout_hdr(lo); | 		pnfs_detach_layout_hdr(lo); | ||||||
| 		spin_unlock(&inode->i_lock); | 		spin_unlock(&inode->i_lock); | ||||||
| 		pnfs_free_layout_hdr(lo); | 		pnfs_free_layout_hdr(lo); | ||||||
| @ -338,6 +344,65 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, | |||||||
| 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Return true if layoutreturn is needed */ | ||||||
|  | static bool | ||||||
|  | pnfs_layout_need_return(struct pnfs_layout_hdr *lo, | ||||||
|  | 			struct pnfs_layout_segment *lseg) | ||||||
|  | { | ||||||
|  | 	struct pnfs_layout_segment *s; | ||||||
|  | 
 | ||||||
|  | 	if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(s, &lo->plh_segs, pls_list) | ||||||
|  | 		if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) | ||||||
|  | 			return false; | ||||||
|  | 
 | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void pnfs_layoutreturn_free_lseg(struct work_struct *work) | ||||||
|  | { | ||||||
|  | 	struct pnfs_layout_segment *lseg; | ||||||
|  | 	struct pnfs_layout_hdr *lo; | ||||||
|  | 	struct inode *inode; | ||||||
|  | 
 | ||||||
|  | 	lseg = container_of(work, struct pnfs_layout_segment, pls_work); | ||||||
|  | 	WARN_ON(atomic_read(&lseg->pls_refcount)); | ||||||
|  | 	lo = lseg->pls_layout; | ||||||
|  | 	inode = lo->plh_inode; | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&inode->i_lock); | ||||||
|  | 	if (pnfs_layout_need_return(lo, lseg)) { | ||||||
|  | 		nfs4_stateid stateid; | ||||||
|  | 		enum pnfs_iomode iomode; | ||||||
|  | 
 | ||||||
|  | 		stateid = lo->plh_stateid; | ||||||
|  | 		iomode = lo->plh_return_iomode; | ||||||
|  | 		/* decreased in pnfs_send_layoutreturn() */ | ||||||
|  | 		lo->plh_block_lgets++; | ||||||
|  | 		lo->plh_return_iomode = 0; | ||||||
|  | 		spin_unlock(&inode->i_lock); | ||||||
|  | 
 | ||||||
|  | 		pnfs_send_layoutreturn(lo, stateid, iomode, true); | ||||||
|  | 		spin_lock(&inode->i_lock); | ||||||
|  | 	} else | ||||||
|  | 		/* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */ | ||||||
|  | 		pnfs_put_layout_hdr(lo); | ||||||
|  | 	pnfs_layout_remove_lseg(lo, lseg); | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
|  | 	pnfs_free_lseg(lseg); | ||||||
|  | 	/* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */ | ||||||
|  | 	pnfs_put_layout_hdr(lo); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void | ||||||
|  | pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg) | ||||||
|  | { | ||||||
|  | 	INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg); | ||||||
|  | 	queue_work(nfsiod_workqueue, &lseg->pls_work); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void | void | ||||||
| pnfs_put_lseg(struct pnfs_layout_segment *lseg) | pnfs_put_lseg(struct pnfs_layout_segment *lseg) | ||||||
| { | { | ||||||
| @ -354,12 +419,19 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) | |||||||
| 	inode = lo->plh_inode; | 	inode = lo->plh_inode; | ||||||
| 	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | ||||||
| 		pnfs_get_layout_hdr(lo); | 		pnfs_get_layout_hdr(lo); | ||||||
|  | 		if (pnfs_layout_need_return(lo, lseg)) { | ||||||
|  | 			spin_unlock(&inode->i_lock); | ||||||
|  | 			/* hdr reference dropped in nfs4_layoutreturn_release */ | ||||||
|  | 			pnfs_get_layout_hdr(lo); | ||||||
|  | 			pnfs_layoutreturn_free_lseg_async(lseg); | ||||||
|  | 		} else { | ||||||
| 			pnfs_layout_remove_lseg(lo, lseg); | 			pnfs_layout_remove_lseg(lo, lseg); | ||||||
| 			spin_unlock(&inode->i_lock); | 			spin_unlock(&inode->i_lock); | ||||||
| 			pnfs_free_lseg(lseg); | 			pnfs_free_lseg(lseg); | ||||||
| 			pnfs_put_layout_hdr(lo); | 			pnfs_put_layout_hdr(lo); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | } | ||||||
| EXPORT_SYMBOL_GPL(pnfs_put_lseg); | EXPORT_SYMBOL_GPL(pnfs_put_lseg); | ||||||
| 
 | 
 | ||||||
| static void pnfs_free_lseg_async_work(struct work_struct *work) | static void pnfs_free_lseg_async_work(struct work_struct *work) | ||||||
| @ -544,6 +616,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||||||
| 		pnfs_get_layout_hdr(lo); | 		pnfs_get_layout_hdr(lo); | ||||||
| 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | ||||||
| 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | ||||||
|  | 		pnfs_clear_retry_layoutget(lo); | ||||||
| 		spin_unlock(&nfsi->vfs_inode.i_lock); | 		spin_unlock(&nfsi->vfs_inode.i_lock); | ||||||
| 		pnfs_free_lseg_list(&tmp_list); | 		pnfs_free_lseg_list(&tmp_list); | ||||||
| 		pnfs_put_layout_hdr(lo); | 		pnfs_put_layout_hdr(lo); | ||||||
| @ -741,25 +814,37 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, | |||||||
| 	return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); | 	return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static bool | ||||||
|  | pnfs_layout_returning(const struct pnfs_layout_hdr *lo, | ||||||
|  | 		      struct pnfs_layout_range *range) | ||||||
|  | { | ||||||
|  | 	return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && | ||||||
|  | 		(lo->plh_return_iomode == IOMODE_ANY || | ||||||
|  | 		 lo->plh_return_iomode == range->iomode); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* lget is set to 1 if called from inside send_layoutget call chain */ | /* lget is set to 1 if called from inside send_layoutget call chain */ | ||||||
| static bool | static bool | ||||||
| pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, | ||||||
|  | 			struct pnfs_layout_range *range, int lget) | ||||||
| { | { | ||||||
| 	return lo->plh_block_lgets || | 	return lo->plh_block_lgets || | ||||||
| 		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | ||||||
| 		(list_empty(&lo->plh_segs) && | 		(list_empty(&lo->plh_segs) && | ||||||
| 		 (atomic_read(&lo->plh_outstanding) > lget)); | 		 (atomic_read(&lo->plh_outstanding) > lget)) || | ||||||
|  | 		pnfs_layout_returning(lo, range); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int | int | ||||||
| pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||||||
|  | 			      struct pnfs_layout_range *range, | ||||||
| 			      struct nfs4_state *open_state) | 			      struct nfs4_state *open_state) | ||||||
| { | { | ||||||
| 	int status = 0; | 	int status = 0; | ||||||
| 
 | 
 | ||||||
| 	dprintk("--> %s\n", __func__); | 	dprintk("--> %s\n", __func__); | ||||||
| 	spin_lock(&lo->plh_inode->i_lock); | 	spin_lock(&lo->plh_inode->i_lock); | ||||||
| 	if (pnfs_layoutgets_blocked(lo, 1)) { | 	if (pnfs_layoutgets_blocked(lo, range, 1)) { | ||||||
| 		status = -EAGAIN; | 		status = -EAGAIN; | ||||||
| 	} else if (!nfs4_valid_open_stateid(open_state)) { | 	} else if (!nfs4_valid_open_stateid(open_state)) { | ||||||
| 		status = -EBADF; | 		status = -EBADF; | ||||||
| @ -826,7 +911,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||||||
| 			pnfs_layout_io_set_failed(lo, range->iomode); | 			pnfs_layout_io_set_failed(lo, range->iomode); | ||||||
| 		} | 		} | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} else | ||||||
|  | 		pnfs_layout_clear_fail_bit(lo, | ||||||
|  | 				pnfs_iomode_to_fail_bit(range->iomode)); | ||||||
| 
 | 
 | ||||||
| 	return lseg; | 	return lseg; | ||||||
| } | } | ||||||
| @ -846,6 +933,49 @@ static void pnfs_clear_layoutcommit(struct inode *inode, | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||||||
|  | 	smp_mb__after_atomic(); | ||||||
|  | 	wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int | ||||||
|  | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | ||||||
|  | 		       enum pnfs_iomode iomode, bool sync) | ||||||
|  | { | ||||||
|  | 	struct inode *ino = lo->plh_inode; | ||||||
|  | 	struct nfs4_layoutreturn *lrp; | ||||||
|  | 	int status = 0; | ||||||
|  | 
 | ||||||
|  | 	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | ||||||
|  | 	if (unlikely(lrp == NULL)) { | ||||||
|  | 		status = -ENOMEM; | ||||||
|  | 		spin_lock(&ino->i_lock); | ||||||
|  | 		lo->plh_block_lgets--; | ||||||
|  | 		pnfs_clear_layoutreturn_waitbit(lo); | ||||||
|  | 		rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); | ||||||
|  | 		spin_unlock(&ino->i_lock); | ||||||
|  | 		pnfs_put_layout_hdr(lo); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	lrp->args.stateid = stateid; | ||||||
|  | 	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||||||
|  | 	lrp->args.inode = ino; | ||||||
|  | 	lrp->args.range.iomode = iomode; | ||||||
|  | 	lrp->args.range.offset = 0; | ||||||
|  | 	lrp->args.range.length = NFS4_MAX_UINT64; | ||||||
|  | 	lrp->args.layout = lo; | ||||||
|  | 	lrp->clp = NFS_SERVER(ino)->nfs_client; | ||||||
|  | 	lrp->cred = lo->plh_lc_cred; | ||||||
|  | 
 | ||||||
|  | 	status = nfs4_proc_layoutreturn(lrp, sync); | ||||||
|  | out: | ||||||
|  | 	dprintk("<-- %s status: %d\n", __func__, status); | ||||||
|  | 	return status; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr |  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr | ||||||
|  * when the layout segment list is empty. |  * when the layout segment list is empty. | ||||||
| @ -860,7 +990,6 @@ _pnfs_return_layout(struct inode *ino) | |||||||
| 	struct pnfs_layout_hdr *lo = NULL; | 	struct pnfs_layout_hdr *lo = NULL; | ||||||
| 	struct nfs_inode *nfsi = NFS_I(ino); | 	struct nfs_inode *nfsi = NFS_I(ino); | ||||||
| 	LIST_HEAD(tmp_list); | 	LIST_HEAD(tmp_list); | ||||||
| 	struct nfs4_layoutreturn *lrp; |  | ||||||
| 	nfs4_stateid stateid; | 	nfs4_stateid stateid; | ||||||
| 	int status = 0, empty; | 	int status = 0, empty; | ||||||
| 
 | 
 | ||||||
| @ -902,24 +1031,7 @@ _pnfs_return_layout(struct inode *ino) | |||||||
| 	spin_unlock(&ino->i_lock); | 	spin_unlock(&ino->i_lock); | ||||||
| 	pnfs_free_lseg_list(&tmp_list); | 	pnfs_free_lseg_list(&tmp_list); | ||||||
| 
 | 
 | ||||||
| 	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 	status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | ||||||
| 	if (unlikely(lrp == NULL)) { |  | ||||||
| 		status = -ENOMEM; |  | ||||||
| 		spin_lock(&ino->i_lock); |  | ||||||
| 		lo->plh_block_lgets--; |  | ||||||
| 		spin_unlock(&ino->i_lock); |  | ||||||
| 		pnfs_put_layout_hdr(lo); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	lrp->args.stateid = stateid; |  | ||||||
| 	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; |  | ||||||
| 	lrp->args.inode = ino; |  | ||||||
| 	lrp->args.layout = lo; |  | ||||||
| 	lrp->clp = NFS_SERVER(ino)->nfs_client; |  | ||||||
| 	lrp->cred = lo->plh_lc_cred; |  | ||||||
| 
 |  | ||||||
| 	status = nfs4_proc_layoutreturn(lrp); |  | ||||||
| out: | out: | ||||||
| 	dprintk("<-- %s status: %d\n", __func__, status); | 	dprintk("<-- %s status: %d\n", __func__, status); | ||||||
| 	return status; | 	return status; | ||||||
| @ -960,8 +1072,9 @@ bool pnfs_roc(struct inode *ino) | |||||||
| 	struct nfs4_state *state; | 	struct nfs4_state *state; | ||||||
| 	struct pnfs_layout_hdr *lo; | 	struct pnfs_layout_hdr *lo; | ||||||
| 	struct pnfs_layout_segment *lseg, *tmp; | 	struct pnfs_layout_segment *lseg, *tmp; | ||||||
|  | 	nfs4_stateid stateid; | ||||||
| 	LIST_HEAD(tmp_list); | 	LIST_HEAD(tmp_list); | ||||||
| 	bool found = false; | 	bool found = false, layoutreturn = false; | ||||||
| 
 | 
 | ||||||
| 	spin_lock(&ino->i_lock); | 	spin_lock(&ino->i_lock); | ||||||
| 	lo = nfsi->layout; | 	lo = nfsi->layout; | ||||||
| @ -980,6 +1093,8 @@ bool pnfs_roc(struct inode *ino) | |||||||
| 			goto out_noroc; | 			goto out_noroc; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 		goto out_noroc; | ||||||
|  | 	pnfs_clear_retry_layoutget(lo); | ||||||
| 	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | 	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | ||||||
| 		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | ||||||
| 			mark_lseg_invalid(lseg, &tmp_list); | 			mark_lseg_invalid(lseg, &tmp_list); | ||||||
| @ -994,7 +1109,19 @@ bool pnfs_roc(struct inode *ino) | |||||||
| 	return true; | 	return true; | ||||||
| 
 | 
 | ||||||
| out_noroc: | out_noroc: | ||||||
|  | 	if (lo) { | ||||||
|  | 		stateid = lo->plh_stateid; | ||||||
|  | 		layoutreturn = | ||||||
|  | 			test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||||||
|  | 					   &lo->plh_flags); | ||||||
|  | 		if (layoutreturn) { | ||||||
|  | 			lo->plh_block_lgets++; | ||||||
|  | 			pnfs_get_layout_hdr(lo); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 	spin_unlock(&ino->i_lock); | 	spin_unlock(&ino->i_lock); | ||||||
|  | 	if (layoutreturn) | ||||||
|  | 		pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | ||||||
| 	return false; | 	return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1029,8 +1156,9 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||||||
| 	struct nfs_inode *nfsi = NFS_I(ino); | 	struct nfs_inode *nfsi = NFS_I(ino); | ||||||
| 	struct pnfs_layout_hdr *lo; | 	struct pnfs_layout_hdr *lo; | ||||||
| 	struct pnfs_layout_segment *lseg; | 	struct pnfs_layout_segment *lseg; | ||||||
|  | 	nfs4_stateid stateid; | ||||||
| 	u32 current_seqid; | 	u32 current_seqid; | ||||||
| 	bool found = false; | 	bool found = false, layoutreturn = false; | ||||||
| 
 | 
 | ||||||
| 	spin_lock(&ino->i_lock); | 	spin_lock(&ino->i_lock); | ||||||
| 	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | 	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | ||||||
| @ -1047,7 +1175,21 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||||||
| 	 */ | 	 */ | ||||||
| 	*barrier = current_seqid + atomic_read(&lo->plh_outstanding); | 	*barrier = current_seqid + atomic_read(&lo->plh_outstanding); | ||||||
| out: | out: | ||||||
|  | 	if (!found) { | ||||||
|  | 		stateid = lo->plh_stateid; | ||||||
|  | 		layoutreturn = | ||||||
|  | 			test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||||||
|  | 					   &lo->plh_flags); | ||||||
|  | 		if (layoutreturn) { | ||||||
|  | 			lo->plh_block_lgets++; | ||||||
|  | 			pnfs_get_layout_hdr(lo); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 	spin_unlock(&ino->i_lock); | 	spin_unlock(&ino->i_lock); | ||||||
|  | 	if (layoutreturn) { | ||||||
|  | 		rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); | ||||||
|  | 		pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); | ||||||
|  | 	} | ||||||
| 	return found; | 	return found; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1194,6 +1336,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||||||
| 
 | 
 | ||||||
| 	list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 	list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | ||||||
| 		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | ||||||
|  | 		    !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && | ||||||
| 		    pnfs_lseg_range_match(&lseg->pls_range, range)) { | 		    pnfs_lseg_range_match(&lseg->pls_range, range)) { | ||||||
| 			ret = pnfs_get_lseg(lseg); | 			ret = pnfs_get_lseg(lseg); | ||||||
| 			break; | 			break; | ||||||
| @ -1282,6 +1425,35 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, | |||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ | ||||||
|  | static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) | ||||||
|  | { | ||||||
|  | 	if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) | ||||||
|  | 		return 1; | ||||||
|  | 	return nfs_wait_bit_killable(key); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * send layoutcommit as it can hold up layoutreturn due to lseg | ||||||
|  | 	 * reference | ||||||
|  | 	 */ | ||||||
|  | 	pnfs_layoutcommit_inode(lo->plh_inode, false); | ||||||
|  | 	return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, | ||||||
|  | 				   pnfs_layoutget_retry_bit_wait, | ||||||
|  | 				   TASK_UNINTERRUPTIBLE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	unsigned long *bitlock = &lo->plh_flags; | ||||||
|  | 
 | ||||||
|  | 	clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); | ||||||
|  | 	smp_mb__after_atomic(); | ||||||
|  | 	wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Layout segment is retreived from the server if not cached. |  * Layout segment is retreived from the server if not cached. | ||||||
|  * The appropriate layout segment is referenced and returned to the caller. |  * The appropriate layout segment is referenced and returned to the caller. | ||||||
| @ -1312,6 +1484,8 @@ pnfs_update_layout(struct inode *ino, | |||||||
| 	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | 	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
|  | lookup_again: | ||||||
|  | 	first = false; | ||||||
| 	spin_lock(&ino->i_lock); | 	spin_lock(&ino->i_lock); | ||||||
| 	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | ||||||
| 	if (lo == NULL) { | 	if (lo == NULL) { | ||||||
| @ -1326,26 +1500,61 @@ pnfs_update_layout(struct inode *ino, | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* if LAYOUTGET already failed once we don't try again */ | 	/* if LAYOUTGET already failed once we don't try again */ | ||||||
| 	if (pnfs_layout_io_test_failed(lo, iomode)) | 	if (pnfs_layout_io_test_failed(lo, iomode) && | ||||||
|  | 	    !pnfs_should_retry_layoutget(lo)) | ||||||
| 		goto out_unlock; | 		goto out_unlock; | ||||||
| 
 | 
 | ||||||
| 	/* Check to see if the layout for the given range already exists */ | 	first = list_empty(&lo->plh_segs); | ||||||
|  | 	if (first) { | ||||||
|  | 		/* The first layoutget for the file. Need to serialize per
 | ||||||
|  | 		 * RFC 5661 Errata 3208. | ||||||
|  | 		 */ | ||||||
|  | 		if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, | ||||||
|  | 				     &lo->plh_flags)) { | ||||||
|  | 			spin_unlock(&ino->i_lock); | ||||||
|  | 			wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, | ||||||
|  | 				    TASK_UNINTERRUPTIBLE); | ||||||
|  | 			pnfs_put_layout_hdr(lo); | ||||||
|  | 			goto lookup_again; | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		/* Check to see if the layout for the given range
 | ||||||
|  | 		 * already exists | ||||||
|  | 		 */ | ||||||
| 		lseg = pnfs_find_lseg(lo, &arg); | 		lseg = pnfs_find_lseg(lo, &arg); | ||||||
| 		if (lseg) | 		if (lseg) | ||||||
| 			goto out_unlock; | 			goto out_unlock; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	if (pnfs_layoutgets_blocked(lo, 0)) | 	/*
 | ||||||
|  | 	 * Because we free lsegs before sending LAYOUTRETURN, we need to wait | ||||||
|  | 	 * for LAYOUTRETURN even if first is true. | ||||||
|  | 	 */ | ||||||
|  | 	if (!lseg && pnfs_should_retry_layoutget(lo) && | ||||||
|  | 	    test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { | ||||||
|  | 		spin_unlock(&ino->i_lock); | ||||||
|  | 		dprintk("%s wait for layoutreturn\n", __func__); | ||||||
|  | 		if (pnfs_prepare_to_retry_layoutget(lo)) { | ||||||
|  | 			if (first) | ||||||
|  | 				pnfs_clear_first_layoutget(lo); | ||||||
|  | 			pnfs_put_layout_hdr(lo); | ||||||
|  | 			dprintk("%s retrying\n", __func__); | ||||||
|  | 			goto lookup_again; | ||||||
|  | 		} | ||||||
|  | 		goto out_put_layout_hdr; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (pnfs_layoutgets_blocked(lo, &arg, 0)) | ||||||
| 		goto out_unlock; | 		goto out_unlock; | ||||||
| 	atomic_inc(&lo->plh_outstanding); | 	atomic_inc(&lo->plh_outstanding); | ||||||
| 
 |  | ||||||
| 	first = list_empty(&lo->plh_layouts) ? true : false; |  | ||||||
| 	spin_unlock(&ino->i_lock); | 	spin_unlock(&ino->i_lock); | ||||||
| 
 | 
 | ||||||
| 	if (first) { | 	if (list_empty(&lo->plh_layouts)) { | ||||||
| 		/* The lo must be on the clp list if there is any
 | 		/* The lo must be on the clp list if there is any
 | ||||||
| 		 * chance of a CB_LAYOUTRECALL(FILE) coming in. | 		 * chance of a CB_LAYOUTRECALL(FILE) coming in. | ||||||
| 		 */ | 		 */ | ||||||
| 		spin_lock(&clp->cl_lock); | 		spin_lock(&clp->cl_lock); | ||||||
|  | 		if (list_empty(&lo->plh_layouts)) | ||||||
| 			list_add_tail(&lo->plh_layouts, &server->layouts); | 			list_add_tail(&lo->plh_layouts, &server->layouts); | ||||||
| 		spin_unlock(&clp->cl_lock); | 		spin_unlock(&clp->cl_lock); | ||||||
| 	} | 	} | ||||||
| @ -1359,8 +1568,11 @@ pnfs_update_layout(struct inode *ino, | |||||||
| 		arg.length = PAGE_CACHE_ALIGN(arg.length); | 		arg.length = PAGE_CACHE_ALIGN(arg.length); | ||||||
| 
 | 
 | ||||||
| 	lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 	lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | ||||||
|  | 	pnfs_clear_retry_layoutget(lo); | ||||||
| 	atomic_dec(&lo->plh_outstanding); | 	atomic_dec(&lo->plh_outstanding); | ||||||
| out_put_layout_hdr: | out_put_layout_hdr: | ||||||
|  | 	if (first) | ||||||
|  | 		pnfs_clear_first_layoutget(lo); | ||||||
| 	pnfs_put_layout_hdr(lo); | 	pnfs_put_layout_hdr(lo); | ||||||
| out: | out: | ||||||
| 	dprintk("%s: inode %s/%llu pNFS layout segment %s for " | 	dprintk("%s: inode %s/%llu pNFS layout segment %s for " | ||||||
| @ -1409,7 +1621,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||||||
| 		goto out_forget_reply; | 		goto out_forget_reply; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (pnfs_layoutgets_blocked(lo, 1)) { | 	if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) { | ||||||
| 		dprintk("%s forget reply due to state\n", __func__); | 		dprintk("%s forget reply due to state\n", __func__); | ||||||
| 		goto out_forget_reply; | 		goto out_forget_reply; | ||||||
| 	} | 	} | ||||||
| @ -1456,13 +1668,67 @@ out_forget_reply: | |||||||
| 	goto out; | 	goto out; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void | ||||||
|  | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | ||||||
|  | 				struct list_head *tmp_list, | ||||||
|  | 				struct pnfs_layout_range *return_range) | ||||||
|  | { | ||||||
|  | 	struct pnfs_layout_segment *lseg, *next; | ||||||
|  | 
 | ||||||
|  | 	dprintk("%s:Begin lo %p\n", __func__, lo); | ||||||
|  | 
 | ||||||
|  | 	if (list_empty(&lo->plh_segs)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | ||||||
|  | 		if (should_free_lseg(&lseg->pls_range, return_range)) { | ||||||
|  | 			dprintk("%s: marking lseg %p iomode %d " | ||||||
|  | 				"offset %llu length %llu\n", __func__, | ||||||
|  | 				lseg, lseg->pls_range.iomode, | ||||||
|  | 				lseg->pls_range.offset, | ||||||
|  | 				lseg->pls_range.length); | ||||||
|  | 			set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | ||||||
|  | 			mark_lseg_invalid(lseg, tmp_list); | ||||||
|  | 		} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void pnfs_error_mark_layout_for_return(struct inode *inode, | ||||||
|  | 				       struct pnfs_layout_segment *lseg) | ||||||
|  | { | ||||||
|  | 	struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; | ||||||
|  | 	int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); | ||||||
|  | 	struct pnfs_layout_range range = { | ||||||
|  | 		.iomode = lseg->pls_range.iomode, | ||||||
|  | 		.offset = 0, | ||||||
|  | 		.length = NFS4_MAX_UINT64, | ||||||
|  | 	}; | ||||||
|  | 	LIST_HEAD(free_me); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&inode->i_lock); | ||||||
|  | 	/* set failure bit so that pnfs path will be retried later */ | ||||||
|  | 	pnfs_layout_set_fail_bit(lo, iomode); | ||||||
|  | 	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||||||
|  | 	if (lo->plh_return_iomode == 0) | ||||||
|  | 		lo->plh_return_iomode = range.iomode; | ||||||
|  | 	else if (lo->plh_return_iomode != range.iomode) | ||||||
|  | 		lo->plh_return_iomode = IOMODE_ANY; | ||||||
|  | 	/*
 | ||||||
|  | 	 * mark all matching lsegs so that we are sure to have no live | ||||||
|  | 	 * segments at hand when sending layoutreturn. See pnfs_put_lseg() | ||||||
|  | 	 * for how it works. | ||||||
|  | 	 */ | ||||||
|  | 	pnfs_mark_matching_lsegs_return(lo, &free_me, &range); | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
|  | 	pnfs_free_lseg_list(&free_me); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); | ||||||
|  | 
 | ||||||
| void | void | ||||||
| pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||||||
| { | { | ||||||
| 	u64 rd_size = req->wb_bytes; | 	u64 rd_size = req->wb_bytes; | ||||||
| 
 | 
 | ||||||
| 	WARN_ON_ONCE(pgio->pg_lseg != NULL); | 	if (pgio->pg_lseg == NULL) { | ||||||
| 
 |  | ||||||
| 		if (pgio->pg_dreq == NULL) | 		if (pgio->pg_dreq == NULL) | ||||||
| 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | ||||||
| 		else | 		else | ||||||
| @ -1474,6 +1740,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||||||
| 						   rd_size, | 						   rd_size, | ||||||
| 						   IOMODE_READ, | 						   IOMODE_READ, | ||||||
| 						   GFP_KERNEL); | 						   GFP_KERNEL); | ||||||
|  | 	} | ||||||
| 	/* If no lseg, fall back to read through mds */ | 	/* If no lseg, fall back to read through mds */ | ||||||
| 	if (pgio->pg_lseg == NULL) | 	if (pgio->pg_lseg == NULL) | ||||||
| 		nfs_pageio_reset_read_mds(pgio); | 		nfs_pageio_reset_read_mds(pgio); | ||||||
| @ -1485,8 +1752,7 @@ void | |||||||
| pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | ||||||
| 			   struct nfs_page *req, u64 wb_size) | 			   struct nfs_page *req, u64 wb_size) | ||||||
| { | { | ||||||
| 	WARN_ON_ONCE(pgio->pg_lseg != NULL); | 	if (pgio->pg_lseg == NULL) | ||||||
| 
 |  | ||||||
| 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||||||
| 						   req->wb_context, | 						   req->wb_context, | ||||||
| 						   req_offset(req), | 						   req_offset(req), | ||||||
| @ -1499,13 +1765,23 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | ||||||
| 
 | 
 | ||||||
|  | void | ||||||
|  | pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) | ||||||
|  | { | ||||||
|  | 	if (desc->pg_lseg) { | ||||||
|  | 		pnfs_put_lseg(desc->pg_lseg); | ||||||
|  | 		desc->pg_lseg = NULL; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |  * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||||||
|  * of bytes (maximum @req->wb_bytes) that can be coalesced. |  * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||||||
|  */ |  */ | ||||||
| size_t | size_t | ||||||
| pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, | ||||||
| 		     struct nfs_page *req) | 		     struct nfs_page *prev, struct nfs_page *req) | ||||||
| { | { | ||||||
| 	unsigned int size; | 	unsigned int size; | ||||||
| 	u64 seg_end, req_start, seg_left; | 	u64 seg_end, req_start, seg_left; | ||||||
| @ -1529,10 +1805,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||||||
| 		seg_end = end_offset(pgio->pg_lseg->pls_range.offset, | 		seg_end = end_offset(pgio->pg_lseg->pls_range.offset, | ||||||
| 				     pgio->pg_lseg->pls_range.length); | 				     pgio->pg_lseg->pls_range.length); | ||||||
| 		req_start = req_offset(req); | 		req_start = req_offset(req); | ||||||
| 		WARN_ON_ONCE(req_start > seg_end); | 		WARN_ON_ONCE(req_start >= seg_end); | ||||||
| 		/* start of request is past the last byte of this segment */ | 		/* start of request is past the last byte of this segment */ | ||||||
| 		if (req_start >= seg_end) | 		if (req_start >= seg_end) { | ||||||
|  | 			/* reference the new lseg */ | ||||||
|  | 			if (pgio->pg_ops->pg_cleanup) | ||||||
|  | 				pgio->pg_ops->pg_cleanup(pgio); | ||||||
|  | 			if (pgio->pg_ops->pg_init) | ||||||
|  | 				pgio->pg_ops->pg_init(pgio, req); | ||||||
| 			return 0; | 			return 0; | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		/* adjust 'size' iff there are fewer bytes left in the
 | 		/* adjust 'size' iff there are fewer bytes left in the
 | ||||||
| 		 * segment than what nfs_generic_pg_test returned */ | 		 * segment than what nfs_generic_pg_test returned */ | ||||||
| @ -1587,10 +1869,12 @@ static void | |||||||
| pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | ||||||
| 		struct nfs_pgio_header *hdr) | 		struct nfs_pgio_header *hdr) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | ||||||
| 		list_splice_tail_init(&hdr->pages, &desc->pg_list); | 		list_splice_tail_init(&hdr->pages, &mirror->pg_list); | ||||||
| 		nfs_pageio_reset_write_mds(desc); | 		nfs_pageio_reset_write_mds(desc); | ||||||
| 		desc->pg_recoalesce = 1; | 		mirror->pg_recoalesce = 1; | ||||||
| 	} | 	} | ||||||
| 	nfs_pgio_data_destroy(hdr); | 	nfs_pgio_data_destroy(hdr); | ||||||
| } | } | ||||||
| @ -1624,11 +1908,9 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, | |||||||
| 	struct pnfs_layout_segment *lseg = desc->pg_lseg; | 	struct pnfs_layout_segment *lseg = desc->pg_lseg; | ||||||
| 	enum pnfs_try_status trypnfs; | 	enum pnfs_try_status trypnfs; | ||||||
| 
 | 
 | ||||||
| 	desc->pg_lseg = NULL; |  | ||||||
| 	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); | 	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); | ||||||
| 	if (trypnfs == PNFS_NOT_ATTEMPTED) | 	if (trypnfs == PNFS_NOT_ATTEMPTED) | ||||||
| 		pnfs_write_through_mds(desc, hdr); | 		pnfs_write_through_mds(desc, hdr); | ||||||
| 	pnfs_put_lseg(lseg); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | ||||||
| @ -1641,24 +1923,23 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | |||||||
| int | int | ||||||
| pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	struct nfs_pgio_header *hdr; | 	struct nfs_pgio_header *hdr; | ||||||
| 	int ret; | 	int ret; | ||||||
| 
 | 
 | ||||||
| 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | ||||||
| 	if (!hdr) { | 	if (!hdr) { | ||||||
| 		desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 		desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||||||
| 		pnfs_put_lseg(desc->pg_lseg); |  | ||||||
| 		desc->pg_lseg = NULL; |  | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 	} | 	} | ||||||
| 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | ||||||
|  | 
 | ||||||
| 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | ||||||
| 	ret = nfs_generic_pgio(desc, hdr); | 	ret = nfs_generic_pgio(desc, hdr); | ||||||
| 	if (ret != 0) { | 	if (!ret) | ||||||
| 		pnfs_put_lseg(desc->pg_lseg); |  | ||||||
| 		desc->pg_lseg = NULL; |  | ||||||
| 	} else |  | ||||||
| 		pnfs_do_write(desc, hdr, desc->pg_ioflags); | 		pnfs_do_write(desc, hdr, desc->pg_ioflags); | ||||||
|  | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | ||||||
| @ -1703,10 +1984,12 @@ static void | |||||||
| pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | ||||||
| 		struct nfs_pgio_header *hdr) | 		struct nfs_pgio_header *hdr) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | ||||||
| 		list_splice_tail_init(&hdr->pages, &desc->pg_list); | 		list_splice_tail_init(&hdr->pages, &mirror->pg_list); | ||||||
| 		nfs_pageio_reset_read_mds(desc); | 		nfs_pageio_reset_read_mds(desc); | ||||||
| 		desc->pg_recoalesce = 1; | 		mirror->pg_recoalesce = 1; | ||||||
| 	} | 	} | ||||||
| 	nfs_pgio_data_destroy(hdr); | 	nfs_pgio_data_destroy(hdr); | ||||||
| } | } | ||||||
| @ -1735,18 +2018,29 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, | |||||||
| 	return trypnfs; | 	return trypnfs; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Resend all requests through pnfs. */ | ||||||
|  | int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) | ||||||
|  | { | ||||||
|  | 	struct nfs_pageio_descriptor pgio; | ||||||
|  | 
 | ||||||
|  | 	nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); | ||||||
|  | 	return nfs_pageio_resend(&pgio, hdr); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); | ||||||
|  | 
 | ||||||
| static void | static void | ||||||
| pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) | ||||||
| { | { | ||||||
| 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | ||||||
| 	struct pnfs_layout_segment *lseg = desc->pg_lseg; | 	struct pnfs_layout_segment *lseg = desc->pg_lseg; | ||||||
| 	enum pnfs_try_status trypnfs; | 	enum pnfs_try_status trypnfs; | ||||||
|  | 	int err = 0; | ||||||
| 
 | 
 | ||||||
| 	desc->pg_lseg = NULL; |  | ||||||
| 	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); | 	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); | ||||||
| 	if (trypnfs == PNFS_NOT_ATTEMPTED) | 	if (trypnfs == PNFS_TRY_AGAIN) | ||||||
|  | 		err = pnfs_read_resend_pnfs(hdr); | ||||||
|  | 	if (trypnfs == PNFS_NOT_ATTEMPTED || err) | ||||||
| 		pnfs_read_through_mds(desc, hdr); | 		pnfs_read_through_mds(desc, hdr); | ||||||
| 	pnfs_put_lseg(lseg); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | ||||||
| @ -1759,24 +2053,20 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | |||||||
| int | int | ||||||
| pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||||||
|  | 
 | ||||||
| 	struct nfs_pgio_header *hdr; | 	struct nfs_pgio_header *hdr; | ||||||
| 	int ret; | 	int ret; | ||||||
| 
 | 
 | ||||||
| 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | ||||||
| 	if (!hdr) { | 	if (!hdr) { | ||||||
| 		desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 		desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||||||
| 		ret = -ENOMEM; | 		return -ENOMEM; | ||||||
| 		pnfs_put_lseg(desc->pg_lseg); |  | ||||||
| 		desc->pg_lseg = NULL; |  | ||||||
| 		return ret; |  | ||||||
| 	} | 	} | ||||||
| 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | ||||||
| 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | ||||||
| 	ret = nfs_generic_pgio(desc, hdr); | 	ret = nfs_generic_pgio(desc, hdr); | ||||||
| 	if (ret != 0) { | 	if (!ret) | ||||||
| 		pnfs_put_lseg(desc->pg_lseg); |  | ||||||
| 		desc->pg_lseg = NULL; |  | ||||||
| 	} else |  | ||||||
| 		pnfs_do_read(desc, hdr); | 		pnfs_do_read(desc, hdr); | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| @ -1982,6 +2272,7 @@ clear_layoutcommitting: | |||||||
| 	pnfs_clear_layoutcommitting(inode); | 	pnfs_clear_layoutcommitting(inode); | ||||||
| 	goto out; | 	goto out; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); | ||||||
| 
 | 
 | ||||||
| struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | ||||||
| { | { | ||||||
|  | |||||||
							
								
								
									
										125
									
								
								fs/nfs/pnfs.h
									
									
									
									
									
								
							
							
						
						
									
										125
									
								
								fs/nfs/pnfs.h
									
									
									
									
									
								
							| @ -38,6 +38,25 @@ enum { | |||||||
| 	NFS_LSEG_VALID = 0,	/* cleared when lseg is recalled/returned */ | 	NFS_LSEG_VALID = 0,	/* cleared when lseg is recalled/returned */ | ||||||
| 	NFS_LSEG_ROC,		/* roc bit received from server */ | 	NFS_LSEG_ROC,		/* roc bit received from server */ | ||||||
| 	NFS_LSEG_LAYOUTCOMMIT,	/* layoutcommit bit set for layoutcommit */ | 	NFS_LSEG_LAYOUTCOMMIT,	/* layoutcommit bit set for layoutcommit */ | ||||||
|  | 	NFS_LSEG_LAYOUTRETURN,	/* layoutreturn bit set for layoutreturn */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /* Individual ip address */ | ||||||
|  | struct nfs4_pnfs_ds_addr { | ||||||
|  | 	struct sockaddr_storage	da_addr; | ||||||
|  | 	size_t			da_addrlen; | ||||||
|  | 	struct list_head	da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */ | ||||||
|  | 	char			*da_remotestr;	/* human readable addr+port */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct nfs4_pnfs_ds { | ||||||
|  | 	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */ | ||||||
|  | 	char			*ds_remotestr;	/* comma sep list of addrs */ | ||||||
|  | 	struct list_head	ds_addrs; | ||||||
|  | 	struct nfs_client	*ds_clp; | ||||||
|  | 	atomic_t		ds_count; | ||||||
|  | 	unsigned long		ds_state; | ||||||
|  | #define NFS4DS_CONNECTING	0	/* ds is establishing connection */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct pnfs_layout_segment { | struct pnfs_layout_segment { | ||||||
| @ -53,19 +72,34 @@ struct pnfs_layout_segment { | |||||||
| enum pnfs_try_status { | enum pnfs_try_status { | ||||||
| 	PNFS_ATTEMPTED     = 0, | 	PNFS_ATTEMPTED     = 0, | ||||||
| 	PNFS_NOT_ATTEMPTED = 1, | 	PNFS_NOT_ATTEMPTED = 1, | ||||||
|  | 	PNFS_TRY_AGAIN     = 2, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_NFS_V4_1 | #ifdef CONFIG_NFS_V4_1 | ||||||
| 
 | 
 | ||||||
| #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" | #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Default data server connection timeout and retrans vaules. | ||||||
|  |  * Set by module parameters dataserver_timeo and dataserver_retrans. | ||||||
|  |  */ | ||||||
|  | #define NFS4_DEF_DS_TIMEO   600 /* in tenths of a second */ | ||||||
|  | #define NFS4_DEF_DS_RETRANS 5 | ||||||
|  | 
 | ||||||
|  | /* error codes for internal use */ | ||||||
|  | #define NFS4ERR_RESET_TO_MDS   12001 | ||||||
|  | #define NFS4ERR_RESET_TO_PNFS  12002 | ||||||
|  | 
 | ||||||
| enum { | enum { | ||||||
| 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */ | 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */ | ||||||
| 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */ | 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */ | ||||||
| 	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */ | 	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */ | ||||||
| 	NFS_LAYOUT_ROC,			/* some lseg had roc bit set */ | 	NFS_LAYOUT_ROC,			/* some lseg had roc bit set */ | ||||||
| 	NFS_LAYOUT_RETURN,		/* Return this layout ASAP */ | 	NFS_LAYOUT_RETURN,		/* Return this layout ASAP */ | ||||||
|  | 	NFS_LAYOUT_RETURN_BEFORE_CLOSE,	/* Return this layout before close */ | ||||||
| 	NFS_LAYOUT_INVALID_STID,	/* layout stateid id is invalid */ | 	NFS_LAYOUT_INVALID_STID,	/* layout stateid id is invalid */ | ||||||
|  | 	NFS_LAYOUT_FIRST_LAYOUTGET,	/* Serialize first layoutget */ | ||||||
|  | 	NFS_LAYOUT_RETRY_LAYOUTGET,	/* Retry layoutget */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| enum layoutdriver_policy_flags { | enum layoutdriver_policy_flags { | ||||||
| @ -106,7 +140,8 @@ struct pnfs_layoutdriver_type { | |||||||
| 	struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); | 	struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); | ||||||
| 	void (*mark_request_commit) (struct nfs_page *req, | 	void (*mark_request_commit) (struct nfs_page *req, | ||||||
| 				     struct pnfs_layout_segment *lseg, | 				     struct pnfs_layout_segment *lseg, | ||||||
| 				     struct nfs_commit_info *cinfo); | 				     struct nfs_commit_info *cinfo, | ||||||
|  | 				     u32 ds_commit_idx); | ||||||
| 	void (*clear_request_commit) (struct nfs_page *req, | 	void (*clear_request_commit) (struct nfs_page *req, | ||||||
| 				      struct nfs_commit_info *cinfo); | 				      struct nfs_commit_info *cinfo); | ||||||
| 	int (*scan_commit_lists) (struct nfs_commit_info *cinfo, | 	int (*scan_commit_lists) (struct nfs_commit_info *cinfo, | ||||||
| @ -154,6 +189,7 @@ struct pnfs_layout_hdr { | |||||||
| 	u32			plh_barrier; /* ignore lower seqids */ | 	u32			plh_barrier; /* ignore lower seqids */ | ||||||
| 	unsigned long		plh_retry_timestamp; | 	unsigned long		plh_retry_timestamp; | ||||||
| 	unsigned long		plh_flags; | 	unsigned long		plh_flags; | ||||||
|  | 	enum pnfs_iomode	plh_return_iomode; | ||||||
| 	loff_t			plh_lwb; /* last write byte for layoutcommit */ | 	loff_t			plh_lwb; /* last write byte for layoutcommit */ | ||||||
| 	struct rpc_cred		*plh_lc_cred; /* layoutcommit cred */ | 	struct rpc_cred		*plh_lc_cred; /* layoutcommit cred */ | ||||||
| 	struct inode		*plh_inode; | 	struct inode		*plh_inode; | ||||||
| @ -185,7 +221,7 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | |||||||
| 				   struct pnfs_device *dev, | 				   struct pnfs_device *dev, | ||||||
| 				   struct rpc_cred *cred); | 				   struct rpc_cred *cred); | ||||||
| extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | ||||||
| extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); | ||||||
| 
 | 
 | ||||||
| /* pnfs.c */ | /* pnfs.c */ | ||||||
| void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); | ||||||
| @ -198,6 +234,7 @@ void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page * | |||||||
| int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | ||||||
| void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | ||||||
| 			        struct nfs_page *req, u64 wb_size); | 			        struct nfs_page *req, u64 wb_size); | ||||||
|  | void pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *); | ||||||
| int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | ||||||
| size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, | size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, | ||||||
| 			    struct nfs_page *prev, struct nfs_page *req); | 			    struct nfs_page *prev, struct nfs_page *req); | ||||||
| @ -217,6 +254,7 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | |||||||
| 			     bool update_barrier); | 			     bool update_barrier); | ||||||
| int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | ||||||
| 				  struct pnfs_layout_hdr *lo, | 				  struct pnfs_layout_hdr *lo, | ||||||
|  | 				  struct pnfs_layout_range *range, | ||||||
| 				  struct nfs4_state *open_state); | 				  struct nfs4_state *open_state); | ||||||
| int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | ||||||
| 				struct list_head *tmp_list, | 				struct list_head *tmp_list, | ||||||
| @ -233,17 +271,21 @@ int _pnfs_return_layout(struct inode *); | |||||||
| int pnfs_commit_and_return_layout(struct inode *); | int pnfs_commit_and_return_layout(struct inode *); | ||||||
| void pnfs_ld_write_done(struct nfs_pgio_header *); | void pnfs_ld_write_done(struct nfs_pgio_header *); | ||||||
| void pnfs_ld_read_done(struct nfs_pgio_header *); | void pnfs_ld_read_done(struct nfs_pgio_header *); | ||||||
|  | int pnfs_read_resend_pnfs(struct nfs_pgio_header *); | ||||||
| struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | ||||||
| 					       struct nfs_open_context *ctx, | 					       struct nfs_open_context *ctx, | ||||||
| 					       loff_t pos, | 					       loff_t pos, | ||||||
| 					       u64 count, | 					       u64 count, | ||||||
| 					       enum pnfs_iomode iomode, | 					       enum pnfs_iomode iomode, | ||||||
| 					       gfp_t gfp_flags); | 					       gfp_t gfp_flags); | ||||||
|  | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); | ||||||
| 
 | 
 | ||||||
| void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); | ||||||
| int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); | int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); | ||||||
| int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); | int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); | ||||||
| struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); | ||||||
|  | void pnfs_error_mark_layout_for_return(struct inode *inode, | ||||||
|  | 				       struct pnfs_layout_segment *lseg); | ||||||
| 
 | 
 | ||||||
| /* nfs4_deviceid_flags */ | /* nfs4_deviceid_flags */ | ||||||
| enum { | enum { | ||||||
| @ -275,6 +317,34 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); | |||||||
| bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | ||||||
| void nfs4_deviceid_purge_client(const struct nfs_client *); | void nfs4_deviceid_purge_client(const struct nfs_client *); | ||||||
| 
 | 
 | ||||||
|  | /* pnfs_nfs.c */ | ||||||
|  | void pnfs_generic_clear_request_commit(struct nfs_page *req, | ||||||
|  | 				       struct nfs_commit_info *cinfo); | ||||||
|  | void pnfs_generic_commit_release(void *calldata); | ||||||
|  | void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); | ||||||
|  | void pnfs_generic_rw_release(void *data); | ||||||
|  | void pnfs_generic_recover_commit_reqs(struct list_head *dst, | ||||||
|  | 				      struct nfs_commit_info *cinfo); | ||||||
|  | int pnfs_generic_commit_pagelist(struct inode *inode, | ||||||
|  | 				 struct list_head *mds_pages, | ||||||
|  | 				 int how, | ||||||
|  | 				 struct nfs_commit_info *cinfo, | ||||||
|  | 				 int (*initiate_commit)(struct nfs_commit_data *data, | ||||||
|  | 							int how)); | ||||||
|  | int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); | ||||||
|  | void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); | ||||||
|  | void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); | ||||||
|  | struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, | ||||||
|  | 				      gfp_t gfp_flags); | ||||||
|  | void nfs4_pnfs_v3_ds_connect_unload(void); | ||||||
|  | void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, | ||||||
|  | 			  struct nfs4_deviceid_node *devid, unsigned int timeo, | ||||||
|  | 			  unsigned int retrans, u32 version, u32 minor_version, | ||||||
|  | 			  rpc_authflavor_t au_flavor); | ||||||
|  | struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, | ||||||
|  | 						 struct xdr_stream *xdr, | ||||||
|  | 						 gfp_t gfp_flags); | ||||||
|  | 
 | ||||||
| static inline bool nfs_have_layout(struct inode *inode) | static inline bool nfs_have_layout(struct inode *inode) | ||||||
| { | { | ||||||
| 	return NFS_I(inode)->layout != NULL; | 	return NFS_I(inode)->layout != NULL; | ||||||
| @ -287,6 +357,26 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d) | |||||||
| 	return d; | 	return d; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) | ||||||
|  | 		atomic_inc(&lo->plh_refcount); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) { | ||||||
|  | 		atomic_dec(&lo->plh_refcount); | ||||||
|  | 		/* wake up waiters for LAYOUTRETURN as that is not needed */ | ||||||
|  | 		wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) | ||||||
|  | { | ||||||
|  | 	return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline struct pnfs_layout_segment * | static inline struct pnfs_layout_segment * | ||||||
| pnfs_get_lseg(struct pnfs_layout_segment *lseg) | pnfs_get_lseg(struct pnfs_layout_segment *lseg) | ||||||
| { | { | ||||||
| @ -322,16 +412,22 @@ pnfs_get_ds_info(struct inode *inode) | |||||||
| 	return ld->get_ds_info(inode); | 	return ld->get_ds_info(inode); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline void | ||||||
|  | pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) | ||||||
|  | { | ||||||
|  | 	set_bit(NFS_DEVICEID_INVALID, &node->flags); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline bool | static inline bool | ||||||
| pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | ||||||
| 			 struct nfs_commit_info *cinfo) | 			 struct nfs_commit_info *cinfo, u32 ds_commit_idx) | ||||||
| { | { | ||||||
| 	struct inode *inode = req->wb_context->dentry->d_inode; | 	struct inode *inode = req->wb_context->dentry->d_inode; | ||||||
| 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||||||
| 
 | 
 | ||||||
| 	if (lseg == NULL || ld->mark_request_commit == NULL) | 	if (lseg == NULL || ld->mark_request_commit == NULL) | ||||||
| 		return false; | 		return false; | ||||||
| 	ld->mark_request_commit(req, lseg, cinfo); | 	ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx); | ||||||
| 	return true; | 	return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -357,15 +453,6 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, | |||||||
| 		return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); | 		return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void |  | ||||||
| pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, |  | ||||||
| 			 struct nfs_commit_info *cinfo) |  | ||||||
| { |  | ||||||
| 	if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) |  | ||||||
| 		return; |  | ||||||
| 	NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline struct nfs_page * | static inline struct nfs_page * | ||||||
| pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, | pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, | ||||||
| 			struct page *page) | 			struct page *page) | ||||||
| @ -523,7 +610,7 @@ pnfs_get_ds_info(struct inode *inode) | |||||||
| 
 | 
 | ||||||
| static inline bool | static inline bool | ||||||
| pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | ||||||
| 			 struct nfs_commit_info *cinfo) | 			 struct nfs_commit_info *cinfo, u32 ds_commit_idx) | ||||||
| { | { | ||||||
| 	return false; | 	return false; | ||||||
| } | } | ||||||
| @ -541,12 +628,6 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void |  | ||||||
| pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, |  | ||||||
| 			 struct nfs_commit_info *cinfo) |  | ||||||
| { |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline struct nfs_page * | static inline struct nfs_page * | ||||||
| pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, | pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, | ||||||
| 			struct page *page) | 			struct page *page) | ||||||
| @ -578,6 +659,10 @@ static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | |||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline void nfs4_pnfs_v3_ds_connect_unload(void) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #endif /* CONFIG_NFS_V4_1 */ | #endif /* CONFIG_NFS_V4_1 */ | ||||||
| 
 | 
 | ||||||
| #endif /* FS_NFS_PNFS_H */ | #endif /* FS_NFS_PNFS_H */ | ||||||
|  | |||||||
							
								
								
									
										840
									
								
								fs/nfs/pnfs_nfs.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										840
									
								
								fs/nfs/pnfs_nfs.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,840 @@ | |||||||
|  | /*
 | ||||||
|  |  * Common NFS I/O  operations for the pnfs file based | ||||||
|  |  * layout drivers. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||||||
|  |  * | ||||||
|  |  * Tom Haynes <loghyr@primarydata.com> | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <linux/nfs_fs.h> | ||||||
|  | #include <linux/nfs_page.h> | ||||||
|  | #include <linux/sunrpc/addr.h> | ||||||
|  | #include <linux/module.h> | ||||||
|  | 
 | ||||||
|  | #include "nfs4session.h" | ||||||
|  | #include "internal.h" | ||||||
|  | #include "pnfs.h" | ||||||
|  | 
 | ||||||
|  | #define NFSDBG_FACILITY		NFSDBG_PNFS | ||||||
|  | 
 | ||||||
|  | void pnfs_generic_rw_release(void *data) | ||||||
|  | { | ||||||
|  | 	struct nfs_pgio_header *hdr = data; | ||||||
|  | 
 | ||||||
|  | 	nfs_put_client(hdr->ds_clp); | ||||||
|  | 	hdr->mds_ops->rpc_release(data); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); | ||||||
|  | 
 | ||||||
|  | /* Fake up some data that will cause nfs_commit_release to retry the writes. */ | ||||||
|  | void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) | ||||||
|  | { | ||||||
|  | 	struct nfs_page *first = nfs_list_entry(data->pages.next); | ||||||
|  | 
 | ||||||
|  | 	data->task.tk_status = 0; | ||||||
|  | 	memcpy(&data->verf.verifier, &first->wb_verf, | ||||||
|  | 	       sizeof(data->verf.verifier)); | ||||||
|  | 	data->verf.verifier.data[0]++; /* ensure verifier mismatch */ | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); | ||||||
|  | 
 | ||||||
|  | void pnfs_generic_write_commit_done(struct rpc_task *task, void *data) | ||||||
|  | { | ||||||
|  | 	struct nfs_commit_data *wdata = data; | ||||||
|  | 
 | ||||||
|  | 	/* Note this may cause RPC to be resent */ | ||||||
|  | 	wdata->mds_ops->rpc_call_done(task, data); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done); | ||||||
|  | 
 | ||||||
|  | void pnfs_generic_commit_release(void *calldata) | ||||||
|  | { | ||||||
|  | 	struct nfs_commit_data *data = calldata; | ||||||
|  | 
 | ||||||
|  | 	data->completion_ops->completion(data); | ||||||
|  | 	pnfs_put_lseg(data->lseg); | ||||||
|  | 	nfs_put_client(data->ds_clp); | ||||||
|  | 	nfs_commitdata_release(data); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); | ||||||
|  | 
 | ||||||
|  | /* The generic layer is about to remove the req from the commit list.
 | ||||||
|  |  * If this will make the bucket empty, it will need to put the lseg reference. | ||||||
|  |  * Note this must be called holding the inode (/cinfo) lock | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | pnfs_generic_clear_request_commit(struct nfs_page *req, | ||||||
|  | 				  struct nfs_commit_info *cinfo) | ||||||
|  | { | ||||||
|  | 	struct pnfs_layout_segment *freeme = NULL; | ||||||
|  | 
 | ||||||
|  | 	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) | ||||||
|  | 		goto out; | ||||||
|  | 	cinfo->ds->nwritten--; | ||||||
|  | 	if (list_is_singular(&req->wb_list)) { | ||||||
|  | 		struct pnfs_commit_bucket *bucket; | ||||||
|  | 
 | ||||||
|  | 		bucket = list_first_entry(&req->wb_list, | ||||||
|  | 					  struct pnfs_commit_bucket, | ||||||
|  | 					  written); | ||||||
|  | 		freeme = bucket->wlseg; | ||||||
|  | 		bucket->wlseg = NULL; | ||||||
|  | 	} | ||||||
|  | out: | ||||||
|  | 	nfs_request_remove_commit_list(req, cinfo); | ||||||
|  | 	pnfs_put_lseg_locked(freeme); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); | ||||||
|  | 
 | ||||||
|  | static int | ||||||
|  | pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, | ||||||
|  | 				  struct nfs_commit_info *cinfo, int max) | ||||||
|  | { | ||||||
|  | 	struct nfs_page *req, *tmp; | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry_safe(req, tmp, src, wb_list) { | ||||||
|  | 		if (!nfs_lock_request(req)) | ||||||
|  | 			continue; | ||||||
|  | 		kref_get(&req->wb_kref); | ||||||
|  | 		if (cond_resched_lock(cinfo->lock)) | ||||||
|  | 			list_safe_reset_next(req, tmp, wb_list); | ||||||
|  | 		nfs_request_remove_commit_list(req, cinfo); | ||||||
|  | 		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||||||
|  | 		nfs_list_add_request(req, dst); | ||||||
|  | 		ret++; | ||||||
|  | 		if ((ret == max) && !cinfo->dreq) | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int | ||||||
|  | pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | ||||||
|  | 				 struct nfs_commit_info *cinfo, | ||||||
|  | 				 int max) | ||||||
|  | { | ||||||
|  | 	struct list_head *src = &bucket->written; | ||||||
|  | 	struct list_head *dst = &bucket->committing; | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
|  | 	lockdep_assert_held(cinfo->lock); | ||||||
|  | 	ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); | ||||||
|  | 	if (ret) { | ||||||
|  | 		cinfo->ds->nwritten -= ret; | ||||||
|  | 		cinfo->ds->ncommitting += ret; | ||||||
|  | 		bucket->clseg = bucket->wlseg; | ||||||
|  | 		if (list_empty(src)) | ||||||
|  | 			bucket->wlseg = NULL; | ||||||
|  | 		else | ||||||
|  | 			pnfs_get_lseg(bucket->clseg); | ||||||
|  | 	} | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Move reqs from written to committing lists, returning count
 | ||||||
|  |  * of number moved. | ||||||
|  |  */ | ||||||
|  | int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, | ||||||
|  | 				   int max) | ||||||
|  | { | ||||||
|  | 	int i, rv = 0, cnt; | ||||||
|  | 
 | ||||||
|  | 	lockdep_assert_held(cinfo->lock); | ||||||
|  | 	for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { | ||||||
|  | 		cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], | ||||||
|  | 						       cinfo, max); | ||||||
|  | 		max -= cnt; | ||||||
|  | 		rv += cnt; | ||||||
|  | 	} | ||||||
|  | 	return rv; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); | ||||||
|  | 
 | ||||||
|  | /* Pull everything off the committing lists and dump into @dst.  */ | ||||||
|  | void pnfs_generic_recover_commit_reqs(struct list_head *dst, | ||||||
|  | 				      struct nfs_commit_info *cinfo) | ||||||
|  | { | ||||||
|  | 	struct pnfs_commit_bucket *b; | ||||||
|  | 	struct pnfs_layout_segment *freeme; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	lockdep_assert_held(cinfo->lock); | ||||||
|  | restart: | ||||||
|  | 	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | ||||||
|  | 		if (pnfs_generic_transfer_commit_list(&b->written, dst, | ||||||
|  | 						      cinfo, 0)) { | ||||||
|  | 			freeme = b->wlseg; | ||||||
|  | 			b->wlseg = NULL; | ||||||
|  | 			spin_unlock(cinfo->lock); | ||||||
|  | 			pnfs_put_lseg(freeme); | ||||||
|  | 			spin_lock(cinfo->lock); | ||||||
|  | 			goto restart; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	cinfo->ds->nwritten = 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); | ||||||
|  | 
 | ||||||
|  | static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) | ||||||
|  | { | ||||||
|  | 	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; | ||||||
|  | 	struct pnfs_commit_bucket *bucket; | ||||||
|  | 	struct pnfs_layout_segment *freeme; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = idx; i < fl_cinfo->nbuckets; i++) { | ||||||
|  | 		bucket = &fl_cinfo->buckets[i]; | ||||||
|  | 		if (list_empty(&bucket->committing)) | ||||||
|  | 			continue; | ||||||
|  | 		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo, i); | ||||||
|  | 		spin_lock(cinfo->lock); | ||||||
|  | 		freeme = bucket->clseg; | ||||||
|  | 		bucket->clseg = NULL; | ||||||
|  | 		spin_unlock(cinfo->lock); | ||||||
|  | 		pnfs_put_lseg(freeme); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static unsigned int | ||||||
|  | pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, | ||||||
|  | 			      struct list_head *list) | ||||||
|  | { | ||||||
|  | 	struct pnfs_ds_commit_info *fl_cinfo; | ||||||
|  | 	struct pnfs_commit_bucket *bucket; | ||||||
|  | 	struct nfs_commit_data *data; | ||||||
|  | 	int i; | ||||||
|  | 	unsigned int nreq = 0; | ||||||
|  | 
 | ||||||
|  | 	fl_cinfo = cinfo->ds; | ||||||
|  | 	bucket = fl_cinfo->buckets; | ||||||
|  | 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { | ||||||
|  | 		if (list_empty(&bucket->committing)) | ||||||
|  | 			continue; | ||||||
|  | 		data = nfs_commitdata_alloc(); | ||||||
|  | 		if (!data) | ||||||
|  | 			break; | ||||||
|  | 		data->ds_commit_index = i; | ||||||
|  | 		spin_lock(cinfo->lock); | ||||||
|  | 		data->lseg = bucket->clseg; | ||||||
|  | 		bucket->clseg = NULL; | ||||||
|  | 		spin_unlock(cinfo->lock); | ||||||
|  | 		list_add(&data->pages, list); | ||||||
|  | 		nreq++; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Clean up on error */ | ||||||
|  | 	pnfs_generic_retry_commit(cinfo, i); | ||||||
|  | 	return nreq; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* This follows nfs_commit_list pretty closely */ | ||||||
|  | int | ||||||
|  | pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||||||
|  | 			     int how, struct nfs_commit_info *cinfo, | ||||||
|  | 			     int (*initiate_commit)(struct nfs_commit_data *data, | ||||||
|  | 						    int how)) | ||||||
|  | { | ||||||
|  | 	struct nfs_commit_data *data, *tmp; | ||||||
|  | 	LIST_HEAD(list); | ||||||
|  | 	unsigned int nreq = 0; | ||||||
|  | 
 | ||||||
|  | 	if (!list_empty(mds_pages)) { | ||||||
|  | 		data = nfs_commitdata_alloc(); | ||||||
|  | 		if (data != NULL) { | ||||||
|  | 			data->lseg = NULL; | ||||||
|  | 			list_add(&data->pages, &list); | ||||||
|  | 			nreq++; | ||||||
|  | 		} else { | ||||||
|  | 			nfs_retry_commit(mds_pages, NULL, cinfo, 0); | ||||||
|  | 			pnfs_generic_retry_commit(cinfo, 0); | ||||||
|  | 			cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||||||
|  | 			return -ENOMEM; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); | ||||||
|  | 
 | ||||||
|  | 	if (nreq == 0) { | ||||||
|  | 		cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	atomic_add(nreq, &cinfo->mds->rpcs_out); | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry_safe(data, tmp, &list, pages) { | ||||||
|  | 		list_del_init(&data->pages); | ||||||
|  | 		if (!data->lseg) { | ||||||
|  | 			nfs_init_commit(data, mds_pages, NULL, cinfo); | ||||||
|  | 			nfs_initiate_commit(NFS_CLIENT(inode), data, | ||||||
|  | 					    NFS_PROTO(data->inode), | ||||||
|  | 					    data->mds_ops, how, 0); | ||||||
|  | 		} else { | ||||||
|  | 			struct pnfs_commit_bucket *buckets; | ||||||
|  | 
 | ||||||
|  | 			buckets = cinfo->ds->buckets; | ||||||
|  | 			nfs_init_commit(data, | ||||||
|  | 					&buckets[data->ds_commit_index].committing, | ||||||
|  | 					data->lseg, | ||||||
|  | 					cinfo); | ||||||
|  | 			initiate_commit(data, how); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | out: | ||||||
|  | 	cinfo->ds->ncommitting = 0; | ||||||
|  | 	return PNFS_ATTEMPTED; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Data server cache | ||||||
|  |  * | ||||||
|  |  * Data servers can be mapped to different device ids. | ||||||
|  |  * nfs4_pnfs_ds reference counting | ||||||
|  |  *   - set to 1 on allocation | ||||||
|  |  *   - incremented when a device id maps a data server already in the cache. | ||||||
|  |  *   - decremented when deviceid is removed from the cache. | ||||||
|  |  */ | ||||||
|  | static DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||||||
|  | static LIST_HEAD(nfs4_data_server_cache); | ||||||
|  | 
 | ||||||
|  | /* Debug routines */ | ||||||
|  | static void | ||||||
|  | print_ds(struct nfs4_pnfs_ds *ds) | ||||||
|  | { | ||||||
|  | 	if (ds == NULL) { | ||||||
|  | 		printk(KERN_WARNING "%s NULL device\n", __func__); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 	printk(KERN_WARNING "        ds %s\n" | ||||||
|  | 		"        ref count %d\n" | ||||||
|  | 		"        client %p\n" | ||||||
|  | 		"        cl_exchange_flags %x\n", | ||||||
|  | 		ds->ds_remotestr, | ||||||
|  | 		atomic_read(&ds->ds_count), ds->ds_clp, | ||||||
|  | 		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool | ||||||
|  | same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) | ||||||
|  | { | ||||||
|  | 	struct sockaddr_in *a, *b; | ||||||
|  | 	struct sockaddr_in6 *a6, *b6; | ||||||
|  | 
 | ||||||
|  | 	if (addr1->sa_family != addr2->sa_family) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	switch (addr1->sa_family) { | ||||||
|  | 	case AF_INET: | ||||||
|  | 		a = (struct sockaddr_in *)addr1; | ||||||
|  | 		b = (struct sockaddr_in *)addr2; | ||||||
|  | 
 | ||||||
|  | 		if (a->sin_addr.s_addr == b->sin_addr.s_addr && | ||||||
|  | 		    a->sin_port == b->sin_port) | ||||||
|  | 			return true; | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 	case AF_INET6: | ||||||
|  | 		a6 = (struct sockaddr_in6 *)addr1; | ||||||
|  | 		b6 = (struct sockaddr_in6 *)addr2; | ||||||
|  | 
 | ||||||
|  | 		/* LINKLOCAL addresses must have matching scope_id */ | ||||||
|  | 		if (ipv6_addr_src_scope(&a6->sin6_addr) == | ||||||
|  | 		    IPV6_ADDR_SCOPE_LINKLOCAL && | ||||||
|  | 		    a6->sin6_scope_id != b6->sin6_scope_id) | ||||||
|  | 			return false; | ||||||
|  | 
 | ||||||
|  | 		if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && | ||||||
|  | 		    a6->sin6_port == b6->sin6_port) | ||||||
|  | 			return true; | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 	default: | ||||||
|  | 		dprintk("%s: unhandled address family: %u\n", | ||||||
|  | 			__func__, addr1->sa_family); | ||||||
|  | 		return false; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool | ||||||
|  | _same_data_server_addrs_locked(const struct list_head *dsaddrs1, | ||||||
|  | 			       const struct list_head *dsaddrs2) | ||||||
|  | { | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da1, *da2; | ||||||
|  | 
 | ||||||
|  | 	/* step through both lists, comparing as we go */ | ||||||
|  | 	for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), | ||||||
|  | 	     da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); | ||||||
|  | 	     da1 != NULL && da2 != NULL; | ||||||
|  | 	     da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), | ||||||
|  | 	     da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { | ||||||
|  | 		if (!same_sockaddr((struct sockaddr *)&da1->da_addr, | ||||||
|  | 				   (struct sockaddr *)&da2->da_addr)) | ||||||
|  | 			return false; | ||||||
|  | 	} | ||||||
|  | 	if (da1 == NULL && da2 == NULL) | ||||||
|  | 		return true; | ||||||
|  | 
 | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Lookup DS by addresses.  nfs4_ds_cache_lock is held | ||||||
|  |  */ | ||||||
|  | static struct nfs4_pnfs_ds * | ||||||
|  | _data_server_lookup_locked(const struct list_head *dsaddrs) | ||||||
|  | { | ||||||
|  | 	struct nfs4_pnfs_ds *ds; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) | ||||||
|  | 		if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) | ||||||
|  | 			return ds; | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void destroy_ds(struct nfs4_pnfs_ds *ds) | ||||||
|  | { | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da; | ||||||
|  | 
 | ||||||
|  | 	dprintk("--> %s\n", __func__); | ||||||
|  | 	ifdebug(FACILITY) | ||||||
|  | 		print_ds(ds); | ||||||
|  | 
 | ||||||
|  | 	nfs_put_client(ds->ds_clp); | ||||||
|  | 
 | ||||||
|  | 	while (!list_empty(&ds->ds_addrs)) { | ||||||
|  | 		da = list_first_entry(&ds->ds_addrs, | ||||||
|  | 				      struct nfs4_pnfs_ds_addr, | ||||||
|  | 				      da_node); | ||||||
|  | 		list_del_init(&da->da_node); | ||||||
|  | 		kfree(da->da_remotestr); | ||||||
|  | 		kfree(da); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	kfree(ds->ds_remotestr); | ||||||
|  | 	kfree(ds); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) | ||||||
|  | { | ||||||
|  | 	if (atomic_dec_and_lock(&ds->ds_count, | ||||||
|  | 				&nfs4_ds_cache_lock)) { | ||||||
|  | 		list_del_init(&ds->ds_node); | ||||||
|  | 		spin_unlock(&nfs4_ds_cache_lock); | ||||||
|  | 		destroy_ds(ds); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Create a string with a human readable address and port to avoid | ||||||
|  |  * complicated setup around many dprinks. | ||||||
|  |  */ | ||||||
|  | static char * | ||||||
|  | nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) | ||||||
|  | { | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da; | ||||||
|  | 	char *remotestr; | ||||||
|  | 	size_t len; | ||||||
|  | 	char *p; | ||||||
|  | 
 | ||||||
|  | 	len = 3;        /* '{', '}' and eol */ | ||||||
|  | 	list_for_each_entry(da, dsaddrs, da_node) { | ||||||
|  | 		len += strlen(da->da_remotestr) + 1;    /* string plus comma */ | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	remotestr = kzalloc(len, gfp_flags); | ||||||
|  | 	if (!remotestr) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	p = remotestr; | ||||||
|  | 	*(p++) = '{'; | ||||||
|  | 	len--; | ||||||
|  | 	list_for_each_entry(da, dsaddrs, da_node) { | ||||||
|  | 		size_t ll = strlen(da->da_remotestr); | ||||||
|  | 
 | ||||||
|  | 		if (ll > len) | ||||||
|  | 			goto out_err; | ||||||
|  | 
 | ||||||
|  | 		memcpy(p, da->da_remotestr, ll); | ||||||
|  | 		p += ll; | ||||||
|  | 		len -= ll; | ||||||
|  | 
 | ||||||
|  | 		if (len < 1) | ||||||
|  | 			goto out_err; | ||||||
|  | 		(*p++) = ','; | ||||||
|  | 		len--; | ||||||
|  | 	} | ||||||
|  | 	if (len < 2) | ||||||
|  | 		goto out_err; | ||||||
|  | 	*(p++) = '}'; | ||||||
|  | 	*p = '\0'; | ||||||
|  | 	return remotestr; | ||||||
|  | out_err: | ||||||
|  | 	kfree(remotestr); | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if | ||||||
|  |  * uncached and return cached struct nfs4_pnfs_ds. | ||||||
|  |  */ | ||||||
|  | struct nfs4_pnfs_ds * | ||||||
|  | nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) | ||||||
|  | { | ||||||
|  | 	struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; | ||||||
|  | 	char *remotestr; | ||||||
|  | 
 | ||||||
|  | 	if (list_empty(dsaddrs)) { | ||||||
|  | 		dprintk("%s: no addresses defined\n", __func__); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ds = kzalloc(sizeof(*ds), gfp_flags); | ||||||
|  | 	if (!ds) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	/* this is only used for debugging, so it's ok if its NULL */ | ||||||
|  | 	remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&nfs4_ds_cache_lock); | ||||||
|  | 	tmp_ds = _data_server_lookup_locked(dsaddrs); | ||||||
|  | 	if (tmp_ds == NULL) { | ||||||
|  | 		INIT_LIST_HEAD(&ds->ds_addrs); | ||||||
|  | 		list_splice_init(dsaddrs, &ds->ds_addrs); | ||||||
|  | 		ds->ds_remotestr = remotestr; | ||||||
|  | 		atomic_set(&ds->ds_count, 1); | ||||||
|  | 		INIT_LIST_HEAD(&ds->ds_node); | ||||||
|  | 		ds->ds_clp = NULL; | ||||||
|  | 		list_add(&ds->ds_node, &nfs4_data_server_cache); | ||||||
|  | 		dprintk("%s add new data server %s\n", __func__, | ||||||
|  | 			ds->ds_remotestr); | ||||||
|  | 	} else { | ||||||
|  | 		kfree(remotestr); | ||||||
|  | 		kfree(ds); | ||||||
|  | 		atomic_inc(&tmp_ds->ds_count); | ||||||
|  | 		dprintk("%s data server %s found, inc'ed ds_count to %d\n", | ||||||
|  | 			__func__, tmp_ds->ds_remotestr, | ||||||
|  | 			atomic_read(&tmp_ds->ds_count)); | ||||||
|  | 		ds = tmp_ds; | ||||||
|  | 	} | ||||||
|  | 	spin_unlock(&nfs4_ds_cache_lock); | ||||||
|  | out: | ||||||
|  | 	return ds; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); | ||||||
|  | 
 | ||||||
|  | static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) | ||||||
|  | { | ||||||
|  | 	might_sleep(); | ||||||
|  | 	wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, | ||||||
|  | 			TASK_KILLABLE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) | ||||||
|  | { | ||||||
|  | 	smp_mb__before_atomic(); | ||||||
|  | 	clear_bit(NFS4DS_CONNECTING, &ds->ds_state); | ||||||
|  | 	smp_mb__after_atomic(); | ||||||
|  | 	wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct nfs_client *(*get_v3_ds_connect)( | ||||||
|  | 			struct nfs_client *mds_clp, | ||||||
|  | 			const struct sockaddr *ds_addr, | ||||||
|  | 			int ds_addrlen, | ||||||
|  | 			int ds_proto, | ||||||
|  | 			unsigned int ds_timeo, | ||||||
|  | 			unsigned int ds_retrans, | ||||||
|  | 			rpc_authflavor_t au_flavor); | ||||||
|  | 
 | ||||||
|  | static bool load_v3_ds_connect(void) | ||||||
|  | { | ||||||
|  | 	if (!get_v3_ds_connect) { | ||||||
|  | 		get_v3_ds_connect = symbol_request(nfs3_set_ds_client); | ||||||
|  | 		WARN_ON_ONCE(!get_v3_ds_connect); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return(get_v3_ds_connect != NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void __exit nfs4_pnfs_v3_ds_connect_unload(void) | ||||||
|  | { | ||||||
|  | 	if (get_v3_ds_connect) { | ||||||
|  | 		symbol_put(nfs3_set_ds_client); | ||||||
|  | 		get_v3_ds_connect = NULL; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); | ||||||
|  | 
 | ||||||
|  | static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, | ||||||
|  | 				 struct nfs4_pnfs_ds *ds, | ||||||
|  | 				 unsigned int timeo, | ||||||
|  | 				 unsigned int retrans, | ||||||
|  | 				 rpc_authflavor_t au_flavor) | ||||||
|  | { | ||||||
|  | 	struct nfs_client *clp = ERR_PTR(-EIO); | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da; | ||||||
|  | 	int status = 0; | ||||||
|  | 
 | ||||||
|  | 	dprintk("--> %s DS %s au_flavor %d\n", __func__, | ||||||
|  | 		ds->ds_remotestr, au_flavor); | ||||||
|  | 
 | ||||||
|  | 	if (!load_v3_ds_connect()) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(da, &ds->ds_addrs, da_node) { | ||||||
|  | 		dprintk("%s: DS %s: trying address %s\n", | ||||||
|  | 			__func__, ds->ds_remotestr, da->da_remotestr); | ||||||
|  | 
 | ||||||
|  | 		clp = get_v3_ds_connect(mds_srv->nfs_client, | ||||||
|  | 					(struct sockaddr *)&da->da_addr, | ||||||
|  | 					da->da_addrlen, IPPROTO_TCP, | ||||||
|  | 					timeo, retrans, au_flavor); | ||||||
|  | 		if (!IS_ERR(clp)) | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (IS_ERR(clp)) { | ||||||
|  | 		status = PTR_ERR(clp); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	smp_wmb(); | ||||||
|  | 	ds->ds_clp = clp; | ||||||
|  | 	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); | ||||||
|  | out: | ||||||
|  | 	return status; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, | ||||||
|  | 				 struct nfs4_pnfs_ds *ds, | ||||||
|  | 				 unsigned int timeo, | ||||||
|  | 				 unsigned int retrans, | ||||||
|  | 				 u32 minor_version, | ||||||
|  | 				 rpc_authflavor_t au_flavor) | ||||||
|  | { | ||||||
|  | 	struct nfs_client *clp = ERR_PTR(-EIO); | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da; | ||||||
|  | 	int status = 0; | ||||||
|  | 
 | ||||||
|  | 	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, | ||||||
|  | 		au_flavor); | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(da, &ds->ds_addrs, da_node) { | ||||||
|  | 		dprintk("%s: DS %s: trying address %s\n", | ||||||
|  | 			__func__, ds->ds_remotestr, da->da_remotestr); | ||||||
|  | 
 | ||||||
|  | 		clp = nfs4_set_ds_client(mds_srv->nfs_client, | ||||||
|  | 					(struct sockaddr *)&da->da_addr, | ||||||
|  | 					da->da_addrlen, IPPROTO_TCP, | ||||||
|  | 					timeo, retrans, minor_version, | ||||||
|  | 					au_flavor); | ||||||
|  | 		if (!IS_ERR(clp)) | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (IS_ERR(clp)) { | ||||||
|  | 		status = PTR_ERR(clp); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); | ||||||
|  | 	if (status) | ||||||
|  | 		goto out_put; | ||||||
|  | 
 | ||||||
|  | 	smp_wmb(); | ||||||
|  | 	ds->ds_clp = clp; | ||||||
|  | 	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); | ||||||
|  | out: | ||||||
|  | 	return status; | ||||||
|  | out_put: | ||||||
|  | 	nfs_put_client(clp); | ||||||
|  | 	goto out; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Create an rpc connection to the nfs4_pnfs_ds data server. | ||||||
|  |  * Currently only supports IPv4 and IPv6 addresses. | ||||||
|  |  * If connection fails, make devid unavailable. | ||||||
|  |  */ | ||||||
|  | void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, | ||||||
|  | 			  struct nfs4_deviceid_node *devid, unsigned int timeo, | ||||||
|  | 			  unsigned int retrans, u32 version, | ||||||
|  | 			  u32 minor_version, rpc_authflavor_t au_flavor) | ||||||
|  | { | ||||||
|  | 	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { | ||||||
|  | 		int err = 0; | ||||||
|  | 
 | ||||||
|  | 		if (version == 3) { | ||||||
|  | 			err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, | ||||||
|  | 						       retrans, au_flavor); | ||||||
|  | 		} else if (version == 4) { | ||||||
|  | 			err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, | ||||||
|  | 						       retrans, minor_version, | ||||||
|  | 						       au_flavor); | ||||||
|  | 		} else { | ||||||
|  | 			dprintk("%s: unsupported DS version %d\n", __func__, | ||||||
|  | 				version); | ||||||
|  | 			err = -EPROTONOSUPPORT; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (err) | ||||||
|  | 			nfs4_mark_deviceid_unavailable(devid); | ||||||
|  | 		nfs4_clear_ds_conn_bit(ds); | ||||||
|  | 	} else { | ||||||
|  | 		nfs4_wait_ds_connect(ds); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Currently only supports ipv4, ipv6 and one multi-path address. | ||||||
|  |  */ | ||||||
|  | struct nfs4_pnfs_ds_addr * | ||||||
|  | nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) | ||||||
|  | { | ||||||
|  | 	struct nfs4_pnfs_ds_addr *da = NULL; | ||||||
|  | 	char *buf, *portstr; | ||||||
|  | 	__be16 port; | ||||||
|  | 	int nlen, rlen; | ||||||
|  | 	int tmp[2]; | ||||||
|  | 	__be32 *p; | ||||||
|  | 	char *netid, *match_netid; | ||||||
|  | 	size_t len, match_netid_len; | ||||||
|  | 	char *startsep = ""; | ||||||
|  | 	char *endsep = ""; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	/* r_netid */ | ||||||
|  | 	p = xdr_inline_decode(xdr, 4); | ||||||
|  | 	if (unlikely(!p)) | ||||||
|  | 		goto out_err; | ||||||
|  | 	nlen = be32_to_cpup(p++); | ||||||
|  | 
 | ||||||
|  | 	p = xdr_inline_decode(xdr, nlen); | ||||||
|  | 	if (unlikely(!p)) | ||||||
|  | 		goto out_err; | ||||||
|  | 
 | ||||||
|  | 	netid = kmalloc(nlen+1, gfp_flags); | ||||||
|  | 	if (unlikely(!netid)) | ||||||
|  | 		goto out_err; | ||||||
|  | 
 | ||||||
|  | 	netid[nlen] = '\0'; | ||||||
|  | 	memcpy(netid, p, nlen); | ||||||
|  | 
 | ||||||
|  | 	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ | ||||||
|  | 	p = xdr_inline_decode(xdr, 4); | ||||||
|  | 	if (unlikely(!p)) | ||||||
|  | 		goto out_free_netid; | ||||||
|  | 	rlen = be32_to_cpup(p); | ||||||
|  | 
 | ||||||
|  | 	p = xdr_inline_decode(xdr, rlen); | ||||||
|  | 	if (unlikely(!p)) | ||||||
|  | 		goto out_free_netid; | ||||||
|  | 
 | ||||||
|  | 	/* port is ".ABC.DEF", 8 chars max */ | ||||||
|  | 	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { | ||||||
|  | 		dprintk("%s: Invalid address, length %d\n", __func__, | ||||||
|  | 			rlen); | ||||||
|  | 		goto out_free_netid; | ||||||
|  | 	} | ||||||
|  | 	buf = kmalloc(rlen + 1, gfp_flags); | ||||||
|  | 	if (!buf) { | ||||||
|  | 		dprintk("%s: Not enough memory\n", __func__); | ||||||
|  | 		goto out_free_netid; | ||||||
|  | 	} | ||||||
|  | 	buf[rlen] = '\0'; | ||||||
|  | 	memcpy(buf, p, rlen); | ||||||
|  | 
 | ||||||
|  | 	/* replace port '.' with '-' */ | ||||||
|  | 	portstr = strrchr(buf, '.'); | ||||||
|  | 	if (!portstr) { | ||||||
|  | 		dprintk("%s: Failed finding expected dot in port\n", | ||||||
|  | 			__func__); | ||||||
|  | 		goto out_free_buf; | ||||||
|  | 	} | ||||||
|  | 	*portstr = '-'; | ||||||
|  | 
 | ||||||
|  | 	/* find '.' between address and port */ | ||||||
|  | 	portstr = strrchr(buf, '.'); | ||||||
|  | 	if (!portstr) { | ||||||
|  | 		dprintk("%s: Failed finding expected dot between address and " | ||||||
|  | 			"port\n", __func__); | ||||||
|  | 		goto out_free_buf; | ||||||
|  | 	} | ||||||
|  | 	*portstr = '\0'; | ||||||
|  | 
 | ||||||
|  | 	da = kzalloc(sizeof(*da), gfp_flags); | ||||||
|  | 	if (unlikely(!da)) | ||||||
|  | 		goto out_free_buf; | ||||||
|  | 
 | ||||||
|  | 	INIT_LIST_HEAD(&da->da_node); | ||||||
|  | 
 | ||||||
|  | 	if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, | ||||||
|  | 		      sizeof(da->da_addr))) { | ||||||
|  | 		dprintk("%s: error parsing address %s\n", __func__, buf); | ||||||
|  | 		goto out_free_da; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	portstr++; | ||||||
|  | 	sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); | ||||||
|  | 	port = htons((tmp[0] << 8) | (tmp[1])); | ||||||
|  | 
 | ||||||
|  | 	switch (da->da_addr.ss_family) { | ||||||
|  | 	case AF_INET: | ||||||
|  | 		((struct sockaddr_in *)&da->da_addr)->sin_port = port; | ||||||
|  | 		da->da_addrlen = sizeof(struct sockaddr_in); | ||||||
|  | 		match_netid = "tcp"; | ||||||
|  | 		match_netid_len = 3; | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 	case AF_INET6: | ||||||
|  | 		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; | ||||||
|  | 		da->da_addrlen = sizeof(struct sockaddr_in6); | ||||||
|  | 		match_netid = "tcp6"; | ||||||
|  | 		match_netid_len = 4; | ||||||
|  | 		startsep = "["; | ||||||
|  | 		endsep = "]"; | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 	default: | ||||||
|  | 		dprintk("%s: unsupported address family: %u\n", | ||||||
|  | 			__func__, da->da_addr.ss_family); | ||||||
|  | 		goto out_free_da; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { | ||||||
|  | 		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", | ||||||
|  | 			__func__, netid, match_netid); | ||||||
|  | 		goto out_free_da; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* save human readable address */ | ||||||
|  | 	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; | ||||||
|  | 	da->da_remotestr = kzalloc(len, gfp_flags); | ||||||
|  | 
 | ||||||
|  | 	/* NULL is ok, only used for dprintk */ | ||||||
|  | 	if (da->da_remotestr) | ||||||
|  | 		snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, | ||||||
|  | 			 buf, endsep, ntohs(port)); | ||||||
|  | 
 | ||||||
|  | 	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); | ||||||
|  | 	kfree(buf); | ||||||
|  | 	kfree(netid); | ||||||
|  | 	return da; | ||||||
|  | 
 | ||||||
|  | out_free_da: | ||||||
|  | 	kfree(da); | ||||||
|  | out_free_buf: | ||||||
|  | 	dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); | ||||||
|  | 	kfree(buf); | ||||||
|  | out_free_netid: | ||||||
|  | 	kfree(netid); | ||||||
|  | out_err: | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr); | ||||||
| @ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); | |||||||
| 
 | 
 | ||||||
| void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror; | ||||||
|  | 
 | ||||||
| 	pgio->pg_ops = &nfs_pgio_rw_ops; | 	pgio->pg_ops = &nfs_pgio_rw_ops; | ||||||
| 	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | 
 | ||||||
|  | 	/* read path should never have more than one mirror */ | ||||||
|  | 	WARN_ON_ONCE(pgio->pg_mirror_count != 1); | ||||||
|  | 
 | ||||||
|  | 	mirror = &pgio->pg_mirrors[0]; | ||||||
|  | 	mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | ||||||
| 
 | 
 | ||||||
| @ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||||||
| 	struct nfs_page	*new; | 	struct nfs_page	*new; | ||||||
| 	unsigned int len; | 	unsigned int len; | ||||||
| 	struct nfs_pageio_descriptor pgio; | 	struct nfs_pageio_descriptor pgio; | ||||||
|  | 	struct nfs_pgio_mirror *pgm; | ||||||
| 
 | 
 | ||||||
| 	len = nfs_page_length(page); | 	len = nfs_page_length(page); | ||||||
| 	if (len == 0) | 	if (len == 0) | ||||||
| @ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||||||
| 			     &nfs_async_read_completion_ops); | 			     &nfs_async_read_completion_ops); | ||||||
| 	nfs_pageio_add_request(&pgio, new); | 	nfs_pageio_add_request(&pgio, new); | ||||||
| 	nfs_pageio_complete(&pgio); | 	nfs_pageio_complete(&pgio); | ||||||
| 	NFS_I(inode)->read_io += pgio.pg_bytes_written; | 
 | ||||||
|  | 	/* It doesn't make sense to do mirrored reads! */ | ||||||
|  | 	WARN_ON_ONCE(pgio.pg_mirror_count != 1); | ||||||
|  | 
 | ||||||
|  | 	pgm = &pgio.pg_mirrors[0]; | ||||||
|  | 	NFS_I(inode)->read_io += pgm->pg_bytes_written; | ||||||
|  | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -168,13 +182,14 @@ out: | |||||||
| 
 | 
 | ||||||
| static void nfs_initiate_read(struct nfs_pgio_header *hdr, | static void nfs_initiate_read(struct nfs_pgio_header *hdr, | ||||||
| 			      struct rpc_message *msg, | 			      struct rpc_message *msg, | ||||||
|  | 			      const struct nfs_rpc_ops *rpc_ops, | ||||||
| 			      struct rpc_task_setup *task_setup_data, int how) | 			      struct rpc_task_setup *task_setup_data, int how) | ||||||
| { | { | ||||||
| 	struct inode *inode = hdr->inode; | 	struct inode *inode = hdr->inode; | ||||||
| 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; | 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; | ||||||
| 
 | 
 | ||||||
| 	task_setup_data->flags |= swap_flags; | 	task_setup_data->flags |= swap_flags; | ||||||
| 	NFS_PROTO(inode)->read_setup(hdr, msg); | 	rpc_ops->read_setup(hdr, msg); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void | static void | ||||||
| @ -351,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||||||
| 		struct list_head *pages, unsigned nr_pages) | 		struct list_head *pages, unsigned nr_pages) | ||||||
| { | { | ||||||
| 	struct nfs_pageio_descriptor pgio; | 	struct nfs_pageio_descriptor pgio; | ||||||
|  | 	struct nfs_pgio_mirror *pgm; | ||||||
| 	struct nfs_readdesc desc = { | 	struct nfs_readdesc desc = { | ||||||
| 		.pgio = &pgio, | 		.pgio = &pgio, | ||||||
| 	}; | 	}; | ||||||
| @ -386,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||||||
| 			     &nfs_async_read_completion_ops); | 			     &nfs_async_read_completion_ops); | ||||||
| 
 | 
 | ||||||
| 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | ||||||
| 
 |  | ||||||
| 	nfs_pageio_complete(&pgio); | 	nfs_pageio_complete(&pgio); | ||||||
| 	NFS_I(inode)->read_io += pgio.pg_bytes_written; | 
 | ||||||
| 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 	/* It doesn't make sense to do mirrored reads! */ | ||||||
|  | 	WARN_ON_ONCE(pgio.pg_mirror_count != 1); | ||||||
|  | 
 | ||||||
|  | 	pgm = &pgio.pg_mirrors[0]; | ||||||
|  | 	NFS_I(inode)->read_io += pgm->pg_bytes_written; | ||||||
|  | 	npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> | ||||||
|  | 		 PAGE_CACHE_SHIFT; | ||||||
| 	nfs_add_stats(inode, NFSIOS_READPAGES, npages); | 	nfs_add_stats(inode, NFSIOS_READPAGES, npages); | ||||||
| read_complete: | read_complete: | ||||||
| 	put_nfs_open_context(desc.ctx); | 	put_nfs_open_context(desc.ctx); | ||||||
|  | |||||||
| @ -473,13 +473,18 @@ try_again: | |||||||
| 	do { | 	do { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Subrequests are always contiguous, non overlapping | 		 * Subrequests are always contiguous, non overlapping | ||||||
| 		 * and in order. If not, it's a programming error. | 		 * and in order - but may be repeated (mirrored writes). | ||||||
| 		 */ | 		 */ | ||||||
| 		WARN_ON_ONCE(subreq->wb_offset != | 		if (subreq->wb_offset == (head->wb_offset + total_bytes)) { | ||||||
| 		     (head->wb_offset + total_bytes)); |  | ||||||
| 
 |  | ||||||
| 			/* keep track of how many bytes this group covers */ | 			/* keep track of how many bytes this group covers */ | ||||||
| 			total_bytes += subreq->wb_bytes; | 			total_bytes += subreq->wb_bytes; | ||||||
|  | 		} else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || | ||||||
|  | 			    ((subreq->wb_offset + subreq->wb_bytes) > | ||||||
|  | 			     (head->wb_offset + total_bytes)))) { | ||||||
|  | 			nfs_page_group_unlock(head); | ||||||
|  | 			spin_unlock(&inode->i_lock); | ||||||
|  | 			return ERR_PTR(-EIO); | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		if (!nfs_lock_request(subreq)) { | 		if (!nfs_lock_request(subreq)) { | ||||||
| 			/* releases page group bit lock and
 | 			/* releases page group bit lock and
 | ||||||
| @ -842,9 +847,9 @@ EXPORT_SYMBOL_GPL(nfs_init_cinfo); | |||||||
|  */ |  */ | ||||||
| void | void | ||||||
| nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | ||||||
| 			struct nfs_commit_info *cinfo) | 			struct nfs_commit_info *cinfo, u32 ds_commit_idx) | ||||||
| { | { | ||||||
| 	if (pnfs_mark_request_commit(req, lseg, cinfo)) | 	if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) | ||||||
| 		return; | 		return; | ||||||
| 	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); | 	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); | ||||||
| } | } | ||||||
| @ -900,7 +905,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||||||
| 		} | 		} | ||||||
| 		if (nfs_write_need_commit(hdr)) { | 		if (nfs_write_need_commit(hdr)) { | ||||||
| 			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); | 			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); | ||||||
| 			nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 			nfs_mark_request_commit(req, hdr->lseg, &cinfo, | ||||||
|  | 				hdr->pgio_mirror_idx); | ||||||
| 			goto next; | 			goto next; | ||||||
| 		} | 		} | ||||||
| remove_req: | remove_req: | ||||||
| @ -1240,15 +1246,15 @@ static int flush_task_priority(int how) | |||||||
| 
 | 
 | ||||||
| static void nfs_initiate_write(struct nfs_pgio_header *hdr, | static void nfs_initiate_write(struct nfs_pgio_header *hdr, | ||||||
| 			       struct rpc_message *msg, | 			       struct rpc_message *msg, | ||||||
|  | 			       const struct nfs_rpc_ops *rpc_ops, | ||||||
| 			       struct rpc_task_setup *task_setup_data, int how) | 			       struct rpc_task_setup *task_setup_data, int how) | ||||||
| { | { | ||||||
| 	struct inode *inode = hdr->inode; |  | ||||||
| 	int priority = flush_task_priority(how); | 	int priority = flush_task_priority(how); | ||||||
| 
 | 
 | ||||||
| 	task_setup_data->priority = priority; | 	task_setup_data->priority = priority; | ||||||
| 	NFS_PROTO(inode)->write_setup(hdr, msg); | 	rpc_ops->write_setup(hdr, msg); | ||||||
| 
 | 
 | ||||||
| 	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, | 	nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, | ||||||
| 				 &task_setup_data->rpc_client, msg, hdr); | 				 &task_setup_data->rpc_client, msg, hdr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1298,8 +1304,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); | |||||||
| 
 | 
 | ||||||
| void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | ||||||
| { | { | ||||||
|  | 	struct nfs_pgio_mirror *mirror; | ||||||
|  | 
 | ||||||
| 	pgio->pg_ops = &nfs_pgio_rw_ops; | 	pgio->pg_ops = &nfs_pgio_rw_ops; | ||||||
| 	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | 
 | ||||||
|  | 	nfs_pageio_stop_mirroring(pgio); | ||||||
|  | 
 | ||||||
|  | 	mirror = &pgio->pg_mirrors[0]; | ||||||
|  | 	mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | ||||||
| 
 | 
 | ||||||
| @ -1465,6 +1477,7 @@ void nfs_commitdata_release(struct nfs_commit_data *data) | |||||||
| EXPORT_SYMBOL_GPL(nfs_commitdata_release); | EXPORT_SYMBOL_GPL(nfs_commitdata_release); | ||||||
| 
 | 
 | ||||||
| int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | ||||||
|  | 			const struct nfs_rpc_ops *nfs_ops, | ||||||
| 			const struct rpc_call_ops *call_ops, | 			const struct rpc_call_ops *call_ops, | ||||||
| 			int how, int flags) | 			int how, int flags) | ||||||
| { | { | ||||||
| @ -1486,7 +1499,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | |||||||
| 		.priority = priority, | 		.priority = priority, | ||||||
| 	}; | 	}; | ||||||
| 	/* Set up the initial task struct.  */ | 	/* Set up the initial task struct.  */ | ||||||
| 	NFS_PROTO(data->inode)->commit_setup(data, &msg); | 	nfs_ops->commit_setup(data, &msg); | ||||||
| 
 | 
 | ||||||
| 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | ||||||
| 
 | 
 | ||||||
| @ -1554,14 +1567,15 @@ EXPORT_SYMBOL_GPL(nfs_init_commit); | |||||||
| 
 | 
 | ||||||
| void nfs_retry_commit(struct list_head *page_list, | void nfs_retry_commit(struct list_head *page_list, | ||||||
| 		      struct pnfs_layout_segment *lseg, | 		      struct pnfs_layout_segment *lseg, | ||||||
| 		      struct nfs_commit_info *cinfo) | 		      struct nfs_commit_info *cinfo, | ||||||
|  | 		      u32 ds_commit_idx) | ||||||
| { | { | ||||||
| 	struct nfs_page *req; | 	struct nfs_page *req; | ||||||
| 
 | 
 | ||||||
| 	while (!list_empty(page_list)) { | 	while (!list_empty(page_list)) { | ||||||
| 		req = nfs_list_entry(page_list->next); | 		req = nfs_list_entry(page_list->next); | ||||||
| 		nfs_list_remove_request(req); | 		nfs_list_remove_request(req); | ||||||
| 		nfs_mark_request_commit(req, lseg, cinfo); | 		nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); | ||||||
| 		if (!cinfo->dreq) { | 		if (!cinfo->dreq) { | ||||||
| 			dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 			dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||||||
| 			dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, | 			dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, | ||||||
| @ -1589,10 +1603,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, | |||||||
| 	/* Set up the argument struct */ | 	/* Set up the argument struct */ | ||||||
| 	nfs_init_commit(data, head, NULL, cinfo); | 	nfs_init_commit(data, head, NULL, cinfo); | ||||||
| 	atomic_inc(&cinfo->mds->rpcs_out); | 	atomic_inc(&cinfo->mds->rpcs_out); | ||||||
| 	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, | 	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), | ||||||
| 				   how, 0); | 				   data->mds_ops, how, 0); | ||||||
|  out_bad: |  out_bad: | ||||||
| 	nfs_retry_commit(head, NULL, cinfo); | 	nfs_retry_commit(head, NULL, cinfo, 0); | ||||||
| 	cinfo->completion_ops->error_cleanup(NFS_I(inode)); | 	cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||||||
| 	return -ENOMEM; | 	return -ENOMEM; | ||||||
| } | } | ||||||
|  | |||||||
| @ -516,6 +516,7 @@ enum pnfs_layouttype { | |||||||
| 	LAYOUT_NFSV4_1_FILES  = 1, | 	LAYOUT_NFSV4_1_FILES  = 1, | ||||||
| 	LAYOUT_OSD2_OBJECTS = 2, | 	LAYOUT_OSD2_OBJECTS = 2, | ||||||
| 	LAYOUT_BLOCK_VOLUME = 3, | 	LAYOUT_BLOCK_VOLUME = 3, | ||||||
|  | 	LAYOUT_FLEX_FILES = 4, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /* used for both layout return and recall */ | /* used for both layout return and recall */ | ||||||
|  | |||||||
| @ -77,10 +77,6 @@ struct nfs_client { | |||||||
| 	/* Client owner identifier */ | 	/* Client owner identifier */ | ||||||
| 	const char *		cl_owner_id; | 	const char *		cl_owner_id; | ||||||
| 
 | 
 | ||||||
| 	/* Our own IP address, as a null-terminated string.
 |  | ||||||
| 	 * This is used to generate the mv0 callback address. |  | ||||||
| 	 */ |  | ||||||
| 	char			cl_ipaddr[48]; |  | ||||||
| 	u32			cl_cb_ident;	/* v4.0 callback identifier */ | 	u32			cl_cb_ident;	/* v4.0 callback identifier */ | ||||||
| 	const struct nfs4_minor_version_ops *cl_mvops; | 	const struct nfs4_minor_version_ops *cl_mvops; | ||||||
| 	unsigned long		cl_mig_gen; | 	unsigned long		cl_mig_gen; | ||||||
| @ -108,6 +104,11 @@ struct nfs_client { | |||||||
| #define NFS_SP4_MACH_CRED_COMMIT   6	/* COMMIT */ | #define NFS_SP4_MACH_CRED_COMMIT   6	/* COMMIT */ | ||||||
| #endif /* CONFIG_NFS_V4 */ | #endif /* CONFIG_NFS_V4 */ | ||||||
| 
 | 
 | ||||||
|  | 	/* Our own IP address, as a null-terminated string.
 | ||||||
|  | 	 * This is used to generate the mv0 callback address. | ||||||
|  | 	 */ | ||||||
|  | 	char			cl_ipaddr[48]; | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_NFS_FSCACHE | #ifdef CONFIG_NFS_FSCACHE | ||||||
| 	struct fscache_cookie	*fscache;	/* client index cache cookie */ | 	struct fscache_cookie	*fscache;	/* client index cache cookie */ | ||||||
| #endif | #endif | ||||||
|  | |||||||
| @ -73,5 +73,7 @@ int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t | |||||||
| int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); | int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); | ||||||
| int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); | int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); | ||||||
| 
 | 
 | ||||||
|  | int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); | ||||||
|  | 
 | ||||||
| extern unsigned int nfs_idmap_cache_timeout; | extern unsigned int nfs_idmap_cache_timeout; | ||||||
| #endif /* NFS_IDMAP_H */ | #endif /* NFS_IDMAP_H */ | ||||||
|  | |||||||
| @ -58,6 +58,9 @@ struct nfs_pageio_ops { | |||||||
| 	size_t	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, | 	size_t	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, | ||||||
| 			   struct nfs_page *); | 			   struct nfs_page *); | ||||||
| 	int	(*pg_doio)(struct nfs_pageio_descriptor *); | 	int	(*pg_doio)(struct nfs_pageio_descriptor *); | ||||||
|  | 	unsigned int	(*pg_get_mirror_count)(struct nfs_pageio_descriptor *, | ||||||
|  | 				       struct nfs_page *); | ||||||
|  | 	void	(*pg_cleanup)(struct nfs_pageio_descriptor *); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct nfs_rw_ops { | struct nfs_rw_ops { | ||||||
| @ -69,18 +72,21 @@ struct nfs_rw_ops { | |||||||
| 			struct inode *); | 			struct inode *); | ||||||
| 	void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); | 	void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); | ||||||
| 	void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *, | 	void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *, | ||||||
|  | 			    const struct nfs_rpc_ops *, | ||||||
| 			    struct rpc_task_setup *, int); | 			    struct rpc_task_setup *, int); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct nfs_pageio_descriptor { | struct nfs_pgio_mirror { | ||||||
| 	struct list_head	pg_list; | 	struct list_head	pg_list; | ||||||
| 	unsigned long		pg_bytes_written; | 	unsigned long		pg_bytes_written; | ||||||
| 	size_t			pg_count; | 	size_t			pg_count; | ||||||
| 	size_t			pg_bsize; | 	size_t			pg_bsize; | ||||||
| 	unsigned int		pg_base; | 	unsigned int		pg_base; | ||||||
| 	unsigned char		pg_moreio : 1, | 	unsigned char		pg_recoalesce : 1; | ||||||
| 				pg_recoalesce : 1; | }; | ||||||
| 
 | 
 | ||||||
|  | struct nfs_pageio_descriptor { | ||||||
|  | 	unsigned char		pg_moreio : 1; | ||||||
| 	struct inode		*pg_inode; | 	struct inode		*pg_inode; | ||||||
| 	const struct nfs_pageio_ops *pg_ops; | 	const struct nfs_pageio_ops *pg_ops; | ||||||
| 	const struct nfs_rw_ops *pg_rw_ops; | 	const struct nfs_rw_ops *pg_rw_ops; | ||||||
| @ -91,8 +97,18 @@ struct nfs_pageio_descriptor { | |||||||
| 	struct pnfs_layout_segment *pg_lseg; | 	struct pnfs_layout_segment *pg_lseg; | ||||||
| 	struct nfs_direct_req	*pg_dreq; | 	struct nfs_direct_req	*pg_dreq; | ||||||
| 	void			*pg_layout_private; | 	void			*pg_layout_private; | ||||||
|  | 	unsigned int		pg_bsize;	/* default bsize for mirrors */ | ||||||
|  | 
 | ||||||
|  | 	u32			pg_mirror_count; | ||||||
|  | 	struct nfs_pgio_mirror	*pg_mirrors; | ||||||
|  | 	struct nfs_pgio_mirror	pg_mirrors_static[1]; | ||||||
|  | 	struct nfs_pgio_mirror	*pg_mirrors_dynamic; | ||||||
|  | 	u32			pg_mirror_idx;	/* current mirror */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /* arbitrarily selected limit to number of mirrors */ | ||||||
|  | #define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 | ||||||
|  | 
 | ||||||
| #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags)) | #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags)) | ||||||
| 
 | 
 | ||||||
| extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, | extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, | ||||||
|  | |||||||
| @ -293,6 +293,7 @@ struct nfs4_layoutreturn_args { | |||||||
| 	struct nfs4_sequence_args seq_args; | 	struct nfs4_sequence_args seq_args; | ||||||
| 	struct pnfs_layout_hdr *layout; | 	struct pnfs_layout_hdr *layout; | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
|  | 	struct pnfs_layout_range range; | ||||||
| 	nfs4_stateid stateid; | 	nfs4_stateid stateid; | ||||||
| 	__u32   layout_type; | 	__u32   layout_type; | ||||||
| }; | }; | ||||||
| @ -514,6 +515,7 @@ struct nfs_pgio_res { | |||||||
| 	struct nfs4_sequence_res	seq_res; | 	struct nfs4_sequence_res	seq_res; | ||||||
| 	struct nfs_fattr *	fattr; | 	struct nfs_fattr *	fattr; | ||||||
| 	__u32			count; | 	__u32			count; | ||||||
|  | 	__u32			op_status; | ||||||
| 	int			eof;		/* used by read */ | 	int			eof;		/* used by read */ | ||||||
| 	struct nfs_writeverf *	verf;		/* used by write */ | 	struct nfs_writeverf *	verf;		/* used by write */ | ||||||
| 	const struct nfs_server *server;	/* used by write */ | 	const struct nfs_server *server;	/* used by write */ | ||||||
| @ -533,6 +535,7 @@ struct nfs_commitargs { | |||||||
| 
 | 
 | ||||||
| struct nfs_commitres { | struct nfs_commitres { | ||||||
| 	struct nfs4_sequence_res	seq_res; | 	struct nfs4_sequence_res	seq_res; | ||||||
|  | 	__u32			op_status; | ||||||
| 	struct nfs_fattr	*fattr; | 	struct nfs_fattr	*fattr; | ||||||
| 	struct nfs_writeverf	*verf; | 	struct nfs_writeverf	*verf; | ||||||
| 	const struct nfs_server *server; | 	const struct nfs_server *server; | ||||||
| @ -1326,7 +1329,8 @@ struct nfs_pgio_header { | |||||||
| 	__u64			mds_offset;	/* Filelayout dense stripe */ | 	__u64			mds_offset;	/* Filelayout dense stripe */ | ||||||
| 	struct nfs_page_array	page_array; | 	struct nfs_page_array	page_array; | ||||||
| 	struct nfs_client	*ds_clp;	/* pNFS data server */ | 	struct nfs_client	*ds_clp;	/* pNFS data server */ | ||||||
| 	int			ds_idx;		/* ds index if ds_clp is set */ | 	int			ds_commit_idx;	/* ds index if ds_clp is set */ | ||||||
|  | 	int			pgio_mirror_idx;/* mirror index in pgio layer */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct nfs_mds_commit_info { | struct nfs_mds_commit_info { | ||||||
|  | |||||||
| @ -79,6 +79,8 @@ struct rpc_clnt; | |||||||
| struct rpc_iostats *	rpc_alloc_iostats(struct rpc_clnt *); | struct rpc_iostats *	rpc_alloc_iostats(struct rpc_clnt *); | ||||||
| void			rpc_count_iostats(const struct rpc_task *, | void			rpc_count_iostats(const struct rpc_task *, | ||||||
| 					  struct rpc_iostats *); | 					  struct rpc_iostats *); | ||||||
|  | void			rpc_count_iostats_metrics(const struct rpc_task *, | ||||||
|  | 					  struct rpc_iostats *); | ||||||
| void			rpc_print_iostats(struct seq_file *, struct rpc_clnt *); | void			rpc_print_iostats(struct seq_file *, struct rpc_clnt *); | ||||||
| void			rpc_free_iostats(struct rpc_iostats *); | void			rpc_free_iostats(struct rpc_iostats *); | ||||||
| 
 | 
 | ||||||
| @ -87,6 +89,8 @@ void			rpc_free_iostats(struct rpc_iostats *); | |||||||
| static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } | static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } | ||||||
| static inline void rpc_count_iostats(const struct rpc_task *task, | static inline void rpc_count_iostats(const struct rpc_task *task, | ||||||
| 				     struct rpc_iostats *stats) {} | 				     struct rpc_iostats *stats) {} | ||||||
|  | static inline void rpc_count_iostats_metrics(const struct rpc_task *, | ||||||
|  | 					     struct rpc_iostats *) {} | ||||||
| static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} | static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} | ||||||
| static inline void rpc_free_iostats(struct rpc_iostats *stats) {} | static inline void rpc_free_iostats(struct rpc_iostats *stats) {} | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -140,22 +140,20 @@ void rpc_free_iostats(struct rpc_iostats *stats) | |||||||
| EXPORT_SYMBOL_GPL(rpc_free_iostats); | EXPORT_SYMBOL_GPL(rpc_free_iostats); | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * rpc_count_iostats - tally up per-task stats |  * rpc_count_iostats_metrics - tally up per-task stats | ||||||
|  * @task: completed rpc_task |  * @task: completed rpc_task | ||||||
|  * @stats: array of stat structures |  * @op_metrics: stat structure for OP that will accumulate stats from @task | ||||||
|  */ |  */ | ||||||
| void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) | void rpc_count_iostats_metrics(const struct rpc_task *task, | ||||||
|  | 			       struct rpc_iostats *op_metrics) | ||||||
| { | { | ||||||
| 	struct rpc_rqst *req = task->tk_rqstp; | 	struct rpc_rqst *req = task->tk_rqstp; | ||||||
| 	struct rpc_iostats *op_metrics; |  | ||||||
| 	ktime_t delta, now; | 	ktime_t delta, now; | ||||||
| 
 | 
 | ||||||
| 	if (!stats || !req) | 	if (!op_metrics || !req) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	now = ktime_get(); | 	now = ktime_get(); | ||||||
| 	op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; |  | ||||||
| 
 |  | ||||||
| 	spin_lock(&op_metrics->om_lock); | 	spin_lock(&op_metrics->om_lock); | ||||||
| 
 | 
 | ||||||
| 	op_metrics->om_ops++; | 	op_metrics->om_ops++; | ||||||
| @ -175,6 +173,20 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) | |||||||
| 
 | 
 | ||||||
| 	spin_unlock(&op_metrics->om_lock); | 	spin_unlock(&op_metrics->om_lock); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * rpc_count_iostats - tally up per-task stats | ||||||
|  |  * @task: completed rpc_task | ||||||
|  |  * @stats: array of stat structures | ||||||
|  |  * | ||||||
|  |  * Uses the statidx from @task | ||||||
|  |  */ | ||||||
|  | void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) | ||||||
|  | { | ||||||
|  | 	rpc_count_iostats_metrics(task, | ||||||
|  | 				  &stats[task->tk_msg.rpc_proc->p_statidx]); | ||||||
|  | } | ||||||
| EXPORT_SYMBOL_GPL(rpc_count_iostats); | EXPORT_SYMBOL_GPL(rpc_count_iostats); | ||||||
| 
 | 
 | ||||||
| static void _print_name(struct seq_file *seq, unsigned int op, | static void _print_name(struct seq_file *seq, unsigned int op, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user