Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (22 commits) ceph: do not carry i_lock for readdir from dcache fs/ceph/xattr.c: Use kmemdup rbd: passing wrong variable to bvec_kunmap_irq() rbd: null vs ERR_PTR ceph: fix num_pages_free accounting in pagelist ceph: add CEPH_MDS_OP_SETDIRLAYOUT and associated ioctl. ceph: don't crash when passed bad mount options ceph: fix debugfs warnings block: rbd: removing unnecessary test block: rbd: fixed may leaks ceph: switch from BKL to lock_flocks() ceph: preallocate flock state without locks held ceph: add pagelist_reserve, pagelist_truncate, pagelist_set_cursor ceph: use mapping->nrpages to determine if mapping is empty ceph: only invalidate on check_caps if we actually have pages ceph: do not hide .snap in root directory rbd: introduce rados block device (rbd), based on libceph ceph: factor out libceph from Ceph file system ceph-rbd: osdc support for osd call and rollback operations ceph: messenger and osdc changes for rbd ...
This commit is contained in:
		
						commit
						2017bd1945
					
				
							
								
								
									
										11
									
								
								MAINTAINERS
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								MAINTAINERS
									
									
									
									
									
								
							| @ -1527,6 +1527,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | |||||||
| S:	Supported | S:	Supported | ||||||
| F:	Documentation/filesystems/ceph.txt | F:	Documentation/filesystems/ceph.txt | ||||||
| F:	fs/ceph | F:	fs/ceph | ||||||
|  | F:	net/ceph | ||||||
|  | F:	include/linux/ceph | ||||||
| 
 | 
 | ||||||
| CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | ||||||
| M:	David Vrabel <david.vrabel@csr.com> | M:	David Vrabel <david.vrabel@csr.com> | ||||||
| @ -4805,6 +4807,15 @@ F:	fs/qnx4/ | |||||||
| F:	include/linux/qnx4_fs.h | F:	include/linux/qnx4_fs.h | ||||||
| F:	include/linux/qnxtypes.h | F:	include/linux/qnxtypes.h | ||||||
| 
 | 
 | ||||||
|  | RADOS BLOCK DEVICE (RBD) | ||||||
|  | F:	include/linux/qnxtypes.h | ||||||
|  | M:	Yehuda Sadeh <yehuda@hq.newdream.net> | ||||||
|  | M:	Sage Weil <sage@newdream.net> | ||||||
|  | M:	ceph-devel@vger.kernel.org | ||||||
|  | S:	Supported | ||||||
|  | F:	drivers/block/rbd.c | ||||||
|  | F:	drivers/block/rbd_types.h | ||||||
|  | 
 | ||||||
| RADEON FRAMEBUFFER DISPLAY DRIVER | RADEON FRAMEBUFFER DISPLAY DRIVER | ||||||
| M:	Benjamin Herrenschmidt <benh@kernel.crashing.org> | M:	Benjamin Herrenschmidt <benh@kernel.crashing.org> | ||||||
| L:	linux-fbdev@vger.kernel.org | L:	linux-fbdev@vger.kernel.org | ||||||
|  | |||||||
| @ -488,4 +488,21 @@ config BLK_DEV_HD | |||||||
| 
 | 
 | ||||||
| 	  If unsure, say N. | 	  If unsure, say N. | ||||||
| 
 | 
 | ||||||
|  | config BLK_DEV_RBD | ||||||
|  | 	tristate "Rados block device (RBD)" | ||||||
|  | 	depends on INET && EXPERIMENTAL && BLOCK | ||||||
|  | 	select CEPH_LIB | ||||||
|  | 	select LIBCRC32C | ||||||
|  | 	select CRYPTO_AES | ||||||
|  | 	select CRYPTO | ||||||
|  | 	default n | ||||||
|  | 	help | ||||||
|  | 	  Say Y here if you want include the Rados block device, which stripes | ||||||
|  | 	  a block device over objects stored in the Ceph distributed object | ||||||
|  | 	  store. | ||||||
|  | 
 | ||||||
|  | 	  More information at http://ceph.newdream.net/. | ||||||
|  | 
 | ||||||
|  | 	  If unsure, say N. | ||||||
|  | 
 | ||||||
| endif # BLK_DEV | endif # BLK_DEV | ||||||
|  | |||||||
| @ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD)	+= hd.o | |||||||
| 
 | 
 | ||||||
| obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o | obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o | ||||||
| obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/ | obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/ | ||||||
|  | obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o | ||||||
| 
 | 
 | ||||||
| swim_mod-objs	:= swim.o swim_asm.o | swim_mod-objs	:= swim.o swim_asm.o | ||||||
|  | |||||||
							
								
								
									
										1841
									
								
								drivers/block/rbd.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1841
									
								
								drivers/block/rbd.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										73
									
								
								drivers/block/rbd_types.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								drivers/block/rbd_types.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,73 @@ | |||||||
|  | /*
 | ||||||
|  |  * Ceph - scalable distributed file system | ||||||
|  |  * | ||||||
|  |  * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net> | ||||||
|  |  * | ||||||
|  |  * This is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License version 2.1, as published by the Free Software | ||||||
|  |  * Foundation.  See file COPYING. | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #ifndef CEPH_RBD_TYPES_H | ||||||
|  | #define CEPH_RBD_TYPES_H | ||||||
|  | 
 | ||||||
|  | #include <linux/types.h> | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * rbd image 'foo' consists of objects | ||||||
|  |  *   foo.rbd      - image metadata | ||||||
|  |  *   foo.00000000 | ||||||
|  |  *   foo.00000001 | ||||||
|  |  *   ...          - data | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #define RBD_SUFFIX		".rbd" | ||||||
|  | #define RBD_DIRECTORY           "rbd_directory" | ||||||
|  | #define RBD_INFO                "rbd_info" | ||||||
|  | 
 | ||||||
|  | #define RBD_DEFAULT_OBJ_ORDER	22   /* 4MB */ | ||||||
|  | #define RBD_MIN_OBJ_ORDER       16 | ||||||
|  | #define RBD_MAX_OBJ_ORDER       30 | ||||||
|  | 
 | ||||||
|  | #define RBD_MAX_OBJ_NAME_LEN	96 | ||||||
|  | #define RBD_MAX_SEG_NAME_LEN	128 | ||||||
|  | 
 | ||||||
|  | #define RBD_COMP_NONE		0 | ||||||
|  | #define RBD_CRYPT_NONE		0 | ||||||
|  | 
 | ||||||
|  | #define RBD_HEADER_TEXT		"<<< Rados Block Device Image >>>\n" | ||||||
|  | #define RBD_HEADER_SIGNATURE	"RBD" | ||||||
|  | #define RBD_HEADER_VERSION	"001.005" | ||||||
|  | 
 | ||||||
|  | struct rbd_info { | ||||||
|  | 	__le64 max_id; | ||||||
|  | } __attribute__ ((packed)); | ||||||
|  | 
 | ||||||
|  | struct rbd_image_snap_ondisk { | ||||||
|  | 	__le64 id; | ||||||
|  | 	__le64 image_size; | ||||||
|  | } __attribute__((packed)); | ||||||
|  | 
 | ||||||
|  | struct rbd_image_header_ondisk { | ||||||
|  | 	char text[40]; | ||||||
|  | 	char block_name[24]; | ||||||
|  | 	char signature[4]; | ||||||
|  | 	char version[8]; | ||||||
|  | 	struct { | ||||||
|  | 		__u8 order; | ||||||
|  | 		__u8 crypt_type; | ||||||
|  | 		__u8 comp_type; | ||||||
|  | 		__u8 unused; | ||||||
|  | 	} __attribute__((packed)) options; | ||||||
|  | 	__le64 image_size; | ||||||
|  | 	__le64 snap_seq; | ||||||
|  | 	__le32 snap_count; | ||||||
|  | 	__le32 reserved; | ||||||
|  | 	__le64 snap_names_len; | ||||||
|  | 	struct rbd_image_snap_ondisk snaps[0]; | ||||||
|  | } __attribute__((packed)); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
| @ -1,9 +1,11 @@ | |||||||
| config CEPH_FS | config CEPH_FS | ||||||
|         tristate "Ceph distributed file system (EXPERIMENTAL)" |         tristate "Ceph distributed file system (EXPERIMENTAL)" | ||||||
| 	depends on INET && EXPERIMENTAL | 	depends on INET && EXPERIMENTAL | ||||||
|  | 	select CEPH_LIB | ||||||
| 	select LIBCRC32C | 	select LIBCRC32C | ||||||
| 	select CRYPTO_AES | 	select CRYPTO_AES | ||||||
| 	select CRYPTO | 	select CRYPTO | ||||||
|  | 	default n | ||||||
| 	help | 	help | ||||||
| 	  Choose Y or M here to include support for mounting the | 	  Choose Y or M here to include support for mounting the | ||||||
| 	  experimental Ceph distributed file system.  Ceph is an extremely | 	  experimental Ceph distributed file system.  Ceph is an extremely | ||||||
| @ -14,15 +16,3 @@ config CEPH_FS | |||||||
| 
 | 
 | ||||||
| 	  If unsure, say N. | 	  If unsure, say N. | ||||||
| 
 | 
 | ||||||
| config CEPH_FS_PRETTYDEBUG |  | ||||||
| 	bool "Include file:line in ceph debug output" |  | ||||||
| 	depends on CEPH_FS |  | ||||||
| 	default n |  | ||||||
| 	help |  | ||||||
| 	  If you say Y here, debug output will include a filename and |  | ||||||
| 	  line to aid debugging.  This icnreases kernel size and slows |  | ||||||
| 	  execution slightly when debug call sites are enabled (e.g., |  | ||||||
| 	  via CONFIG_DYNAMIC_DEBUG). |  | ||||||
| 
 |  | ||||||
| 	  If unsure, say N. |  | ||||||
| 
 |  | ||||||
|  | |||||||
| @ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o | |||||||
| 
 | 
 | ||||||
| ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 | ||||||
| 	export.o caps.o snap.o xattr.o \
 | 	export.o caps.o snap.o xattr.o \
 | ||||||
| 	messenger.o msgpool.o buffer.o pagelist.o \
 | 	mds_client.o mdsmap.o strings.o ceph_frag.o \
 | ||||||
| 	mds_client.o mdsmap.o \
 | 	debugfs.o | ||||||
| 	mon_client.o \
 |  | ||||||
| 	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
 |  | ||||||
| 	debugfs.o \
 |  | ||||||
| 	auth.o auth_none.o \
 |  | ||||||
| 	crypto.o armor.o \
 |  | ||||||
| 	auth_x.o \
 |  | ||||||
| 	ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o |  | ||||||
| 
 | 
 | ||||||
| else | else | ||||||
| #Otherwise we were called directly from the command
 | #Otherwise we were called directly from the command
 | ||||||
|  | |||||||
| @ -1,20 +0,0 @@ | |||||||
| # |  | ||||||
| # The following files are shared by (and manually synchronized |  | ||||||
| # between) the Ceph userland and kernel client. |  | ||||||
| # |  | ||||||
| # userland                  kernel |  | ||||||
| src/include/ceph_fs.h	    fs/ceph/ceph_fs.h |  | ||||||
| src/include/ceph_fs.cc	    fs/ceph/ceph_fs.c |  | ||||||
| src/include/msgr.h	    fs/ceph/msgr.h |  | ||||||
| src/include/rados.h	    fs/ceph/rados.h |  | ||||||
| src/include/ceph_strings.cc fs/ceph/ceph_strings.c |  | ||||||
| src/include/ceph_frag.h	    fs/ceph/ceph_frag.h |  | ||||||
| src/include/ceph_frag.cc    fs/ceph/ceph_frag.c |  | ||||||
| src/include/ceph_hash.h	    fs/ceph/ceph_hash.h |  | ||||||
| src/include/ceph_hash.cc    fs/ceph/ceph_hash.c |  | ||||||
| src/crush/crush.c	    fs/ceph/crush/crush.c |  | ||||||
| src/crush/crush.h	    fs/ceph/crush/crush.h |  | ||||||
| src/crush/mapper.c	    fs/ceph/crush/mapper.c |  | ||||||
| src/crush/mapper.h	    fs/ceph/crush/mapper.h |  | ||||||
| src/crush/hash.h	    fs/ceph/crush/hash.h |  | ||||||
| src/crush/hash.c	    fs/ceph/crush/hash.c |  | ||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/backing-dev.h> | #include <linux/backing-dev.h> | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| @ -10,7 +10,8 @@ | |||||||
| #include <linux/task_io_accounting_ops.h> | #include <linux/task_io_accounting_ops.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "osd_client.h" | #include "mds_client.h" | ||||||
|  | #include <linux/ceph/osd_client.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Ceph address space ops. |  * Ceph address space ops. | ||||||
| @ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||||||
| { | { | ||||||
| 	struct inode *inode = filp->f_dentry->d_inode; | 	struct inode *inode = filp->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 	struct ceph_osd_client *osdc =  | ||||||
|  | 		&ceph_inode_to_client(inode)->client->osdc; | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
| 	u64 len = PAGE_CACHE_SIZE; | 	u64 len = PAGE_CACHE_SIZE; | ||||||
| 
 | 
 | ||||||
| @ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||||||
| { | { | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 	struct ceph_osd_client *osdc = | ||||||
|  | 		&ceph_inode_to_client(inode)->client->osdc; | ||||||
| 	int rc = 0; | 	int rc = 0; | ||||||
| 	struct page **pages; | 	struct page **pages; | ||||||
| 	loff_t offset; | 	loff_t offset; | ||||||
| @ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||||||
| { | { | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 	struct ceph_inode_info *ci; | 	struct ceph_inode_info *ci; | ||||||
| 	struct ceph_client *client; | 	struct ceph_fs_client *fsc; | ||||||
| 	struct ceph_osd_client *osdc; | 	struct ceph_osd_client *osdc; | ||||||
| 	loff_t page_off = page->index << PAGE_CACHE_SHIFT; | 	loff_t page_off = page->index << PAGE_CACHE_SHIFT; | ||||||
| 	int len = PAGE_CACHE_SIZE; | 	int len = PAGE_CACHE_SIZE; | ||||||
| @ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||||||
| 	} | 	} | ||||||
| 	inode = page->mapping->host; | 	inode = page->mapping->host; | ||||||
| 	ci = ceph_inode(inode); | 	ci = ceph_inode(inode); | ||||||
| 	client = ceph_inode_to_client(inode); | 	fsc = ceph_inode_to_client(inode); | ||||||
| 	osdc = &client->osdc; | 	osdc = &fsc->client->osdc; | ||||||
| 
 | 
 | ||||||
| 	/* verify this is a writeable snap context */ | 	/* verify this is a writeable snap context */ | ||||||
| 	snapc = (void *)page->private; | 	snapc = (void *)page->private; | ||||||
| @ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||||||
| 	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | 	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | ||||||
| 	     inode, page, page->index, page_off, len, snapc); | 	     inode, page, page->index, page_off, len, snapc); | ||||||
| 
 | 
 | ||||||
| 	writeback_stat = atomic_long_inc_return(&client->writeback_count); | 	writeback_stat = atomic_long_inc_return(&fsc->writeback_count); | ||||||
| 	if (writeback_stat > | 	if (writeback_stat > | ||||||
| 	    CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) | 	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) | ||||||
| 		set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); | ||||||
| 
 | 
 | ||||||
| 	set_page_writeback(page); | 	set_page_writeback(page); | ||||||
| 	err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 	err = ceph_osdc_writepages(osdc, ceph_vino(inode), | ||||||
| @ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||||||
| 	struct address_space *mapping = inode->i_mapping; | 	struct address_space *mapping = inode->i_mapping; | ||||||
| 	__s32 rc = -EIO; | 	__s32 rc = -EIO; | ||||||
| 	u64 bytes = 0; | 	u64 bytes = 0; | ||||||
| 	struct ceph_client *client = ceph_inode_to_client(inode); | 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||||||
| 	long writeback_stat; | 	long writeback_stat; | ||||||
| 	unsigned issued = ceph_caps_issued(ci); | 	unsigned issued = ceph_caps_issued(ci); | ||||||
| 
 | 
 | ||||||
| @ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req, | |||||||
| 		WARN_ON(!PageUptodate(page)); | 		WARN_ON(!PageUptodate(page)); | ||||||
| 
 | 
 | ||||||
| 		writeback_stat = | 		writeback_stat = | ||||||
| 			atomic_long_dec_return(&client->writeback_count); | 			atomic_long_dec_return(&fsc->writeback_count); | ||||||
| 		if (writeback_stat < | 		if (writeback_stat < | ||||||
| 		    CONGESTION_OFF_THRESH(client->mount_args->congestion_kb)) | 		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) | ||||||
| 			clear_bdi_congested(&client->backing_dev_info, | 			clear_bdi_congested(&fsc->backing_dev_info, | ||||||
| 					    BLK_RW_ASYNC); | 					    BLK_RW_ASYNC); | ||||||
| 
 | 
 | ||||||
| 		ceph_put_snap_context((void *)page->private); | 		ceph_put_snap_context((void *)page->private); | ||||||
| @ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req, | |||||||
|  * mempool.  we avoid the mempool if we can because req->r_num_pages |  * mempool.  we avoid the mempool if we can because req->r_num_pages | ||||||
|  * may be less than the maximum write size. |  * may be less than the maximum write size. | ||||||
|  */ |  */ | ||||||
| static void alloc_page_vec(struct ceph_client *client, | static void alloc_page_vec(struct ceph_fs_client *fsc, | ||||||
| 			   struct ceph_osd_request *req) | 			   struct ceph_osd_request *req) | ||||||
| { | { | ||||||
| 	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, | 	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, | ||||||
| 			       GFP_NOFS); | 			       GFP_NOFS); | ||||||
| 	if (!req->r_pages) { | 	if (!req->r_pages) { | ||||||
| 		req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS); | 		req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); | ||||||
| 		req->r_pages_from_pool = 1; | 		req->r_pages_from_pool = 1; | ||||||
| 		WARN_ON(!req->r_pages); | 		WARN_ON(!req->r_pages); | ||||||
| 	} | 	} | ||||||
| @ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||||||
| 	struct inode *inode = mapping->host; | 	struct inode *inode = mapping->host; | ||||||
| 	struct backing_dev_info *bdi = mapping->backing_dev_info; | 	struct backing_dev_info *bdi = mapping->backing_dev_info; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_client *client; | 	struct ceph_fs_client *fsc; | ||||||
| 	pgoff_t index, start, end; | 	pgoff_t index, start, end; | ||||||
| 	int range_whole = 0; | 	int range_whole = 0; | ||||||
| 	int should_loop = 1; | 	int should_loop = 1; | ||||||
| @ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping, | |||||||
| 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | ||||||
| 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | ||||||
| 
 | 
 | ||||||
| 	client = ceph_inode_to_client(inode); | 	fsc = ceph_inode_to_client(inode); | ||||||
| 	if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { | 	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { | ||||||
| 		pr_warning("writepage_start %p on forced umount\n", inode); | 		pr_warning("writepage_start %p on forced umount\n", inode); | ||||||
| 		return -EIO; /* we're in a forced umount, don't write! */ | 		return -EIO; /* we're in a forced umount, don't write! */ | ||||||
| 	} | 	} | ||||||
| 	if (client->mount_args->wsize && client->mount_args->wsize < wsize) | 	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) | ||||||
| 		wsize = client->mount_args->wsize; | 		wsize = fsc->mount_options->wsize; | ||||||
| 	if (wsize < PAGE_CACHE_SIZE) | 	if (wsize < PAGE_CACHE_SIZE) | ||||||
| 		wsize = PAGE_CACHE_SIZE; | 		wsize = PAGE_CACHE_SIZE; | ||||||
| 	max_pages_ever = wsize >> PAGE_CACHE_SHIFT; | 	max_pages_ever = wsize >> PAGE_CACHE_SHIFT; | ||||||
| @ -769,7 +772,7 @@ get_more_pages: | |||||||
| 				offset = (unsigned long long)page->index | 				offset = (unsigned long long)page->index | ||||||
| 					<< PAGE_CACHE_SHIFT; | 					<< PAGE_CACHE_SHIFT; | ||||||
| 				len = wsize; | 				len = wsize; | ||||||
| 				req = ceph_osdc_new_request(&client->osdc, | 				req = ceph_osdc_new_request(&fsc->client->osdc, | ||||||
| 					    &ci->i_layout, | 					    &ci->i_layout, | ||||||
| 					    ceph_vino(inode), | 					    ceph_vino(inode), | ||||||
| 					    offset, &len, | 					    offset, &len, | ||||||
| @ -782,7 +785,7 @@ get_more_pages: | |||||||
| 					    &inode->i_mtime, true, 1); | 					    &inode->i_mtime, true, 1); | ||||||
| 				max_pages = req->r_num_pages; | 				max_pages = req->r_num_pages; | ||||||
| 
 | 
 | ||||||
| 				alloc_page_vec(client, req); | 				alloc_page_vec(fsc, req); | ||||||
| 				req->r_callback = writepages_finish; | 				req->r_callback = writepages_finish; | ||||||
| 				req->r_inode = inode; | 				req->r_inode = inode; | ||||||
| 			} | 			} | ||||||
| @ -794,10 +797,10 @@ get_more_pages: | |||||||
| 			     inode, page, page->index); | 			     inode, page, page->index); | ||||||
| 
 | 
 | ||||||
| 			writeback_stat = | 			writeback_stat = | ||||||
| 			       atomic_long_inc_return(&client->writeback_count); | 			       atomic_long_inc_return(&fsc->writeback_count); | ||||||
| 			if (writeback_stat > CONGESTION_ON_THRESH( | 			if (writeback_stat > CONGESTION_ON_THRESH( | ||||||
| 				    client->mount_args->congestion_kb)) { | 				    fsc->mount_options->congestion_kb)) { | ||||||
| 				set_bdi_congested(&client->backing_dev_info, | 				set_bdi_congested(&fsc->backing_dev_info, | ||||||
| 						  BLK_RW_ASYNC); | 						  BLK_RW_ASYNC); | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| @ -846,7 +849,7 @@ get_more_pages: | |||||||
| 		op->payload_len = cpu_to_le32(len); | 		op->payload_len = cpu_to_le32(len); | ||||||
| 		req->r_request->hdr.data_len = cpu_to_le32(len); | 		req->r_request->hdr.data_len = cpu_to_le32(len); | ||||||
| 
 | 
 | ||||||
| 		ceph_osdc_start_request(&client->osdc, req, true); | 		ceph_osdc_start_request(&fsc->client->osdc, req, true); | ||||||
| 		req = NULL; | 		req = NULL; | ||||||
| 
 | 
 | ||||||
| 		/* continue? */ | 		/* continue? */ | ||||||
| @ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file, | |||||||
| { | { | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||||||
| 	loff_t page_off = pos & PAGE_CACHE_MASK; | 	loff_t page_off = pos & PAGE_CACHE_MASK; | ||||||
| 	int pos_in_page = pos & ~PAGE_CACHE_MASK; | 	int pos_in_page = pos & ~PAGE_CACHE_MASK; | ||||||
| 	int end_in_page = pos_in_page + len; | 	int end_in_page = pos_in_page + len; | ||||||
| @ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||||||
| 			  struct page *page, void *fsdata) | 			  struct page *page, void *fsdata) | ||||||
| { | { | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_client *client = ceph_inode_to_client(inode); | 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 	unsigned from = pos & (PAGE_CACHE_SIZE - 1); | ||||||
| 	int check_cap = 0; | 	int check_cap = 0; | ||||||
| 
 | 
 | ||||||
| @ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||||||
| { | { | ||||||
| 	struct inode *inode = vma->vm_file->f_dentry->d_inode; | 	struct inode *inode = vma->vm_file->f_dentry->d_inode; | ||||||
| 	struct page *page = vmf->page; | 	struct page *page = vmf->page; | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||||||
| 	loff_t off = page->index << PAGE_CACHE_SHIFT; | 	loff_t off = page->index << PAGE_CACHE_SHIFT; | ||||||
| 	loff_t size, len; | 	loff_t size, len; | ||||||
| 	int ret; | 	int ret; | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/kernel.h> | #include <linux/kernel.h> | ||||||
| @ -9,8 +9,9 @@ | |||||||
| #include <linux/writeback.h> | #include <linux/writeback.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "decode.h" | #include "mds_client.h" | ||||||
| #include "messenger.h" | #include <linux/ceph/decode.h> | ||||||
|  | #include <linux/ceph/messenger.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Capability management |  * Capability management | ||||||
| @ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) | |||||||
| 	spin_unlock(&mdsc->caps_list_lock); | 	spin_unlock(&mdsc->caps_list_lock); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ceph_reservation_status(struct ceph_client *client, | void ceph_reservation_status(struct ceph_fs_client *fsc, | ||||||
| 			     int *total, int *avail, int *used, int *reserved, | 			     int *total, int *avail, int *used, int *reserved, | ||||||
| 			     int *min) | 			     int *min) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 
 | 
 | ||||||
| 	if (total) | 	if (total) | ||||||
| 		*total = mdsc->caps_total_count; | 		*total = mdsc->caps_total_count; | ||||||
| @ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, | |||||||
| static void __cap_set_timeouts(struct ceph_mds_client *mdsc, | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, | ||||||
| 			       struct ceph_inode_info *ci) | 			       struct ceph_inode_info *ci) | ||||||
| { | { | ||||||
| 	struct ceph_mount_args *ma = mdsc->client->mount_args; | 	struct ceph_mount_options *ma = mdsc->fsc->mount_options; | ||||||
| 
 | 
 | ||||||
| 	ci->i_hold_caps_min = round_jiffies(jiffies + | 	ci->i_hold_caps_min = round_jiffies(jiffies + | ||||||
| 					    ma->caps_wanted_delay_min * HZ); | 					    ma->caps_wanted_delay_min * HZ); | ||||||
| @ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode, | |||||||
| 		 unsigned seq, unsigned mseq, u64 realmino, int flags, | 		 unsigned seq, unsigned mseq, u64 realmino, int flags, | ||||||
| 		 struct ceph_cap_reservation *caps_reservation) | 		 struct ceph_cap_reservation *caps_reservation) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_cap *new_cap = NULL; | 	struct ceph_cap *new_cap = NULL; | ||||||
| 	struct ceph_cap *cap; | 	struct ceph_cap *cap; | ||||||
| @ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||||||
| 	struct ceph_mds_session *session = cap->session; | 	struct ceph_mds_session *session = cap->session; | ||||||
| 	struct ceph_inode_info *ci = cap->ci; | 	struct ceph_inode_info *ci = cap->ci; | ||||||
| 	struct ceph_mds_client *mdsc = | 	struct ceph_mds_client *mdsc = | ||||||
| 		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||||||
| 	int removed = 0; | 	int removed = 0; | ||||||
| 
 | 
 | ||||||
| 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | ||||||
| @ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, | |||||||
| 	int mds; | 	int mds; | ||||||
| 	struct ceph_cap_snap *capsnap; | 	struct ceph_cap_snap *capsnap; | ||||||
| 	u32 mseq; | 	u32 mseq; | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||||||
| 	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
 | 	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
 | ||||||
| 						    session->s_mutex */ | 						    session->s_mutex */ | ||||||
| 	u64 next_follows = 0;  /* keep track of how far we've gotten through the
 | 	u64 next_follows = 0;  /* keep track of how far we've gotten through the
 | ||||||
| @ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||||||
| void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = | 	struct ceph_mds_client *mdsc = | ||||||
| 		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||||||
| 	struct inode *inode = &ci->vfs_inode; | 	struct inode *inode = &ci->vfs_inode; | ||||||
| 	int was = ci->i_dirty_caps; | 	int was = ci->i_dirty_caps; | ||||||
| 	int dirty = 0; | 	int dirty = 0; | ||||||
| @ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||||||
| static int __mark_caps_flushing(struct inode *inode, | static int __mark_caps_flushing(struct inode *inode, | ||||||
| 				 struct ceph_mds_session *session) | 				 struct ceph_mds_session *session) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	int flushing; | 	int flushing; | ||||||
| 
 | 
 | ||||||
| @ -1416,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode, | |||||||
| /*
 | /*
 | ||||||
|  * try to invalidate mapping pages without blocking. |  * try to invalidate mapping pages without blocking. | ||||||
|  */ |  */ | ||||||
| static int mapping_is_empty(struct address_space *mapping) |  | ||||||
| { |  | ||||||
| 	struct page *page = find_get_page(mapping, 0); |  | ||||||
| 
 |  | ||||||
| 	if (!page) |  | ||||||
| 		return 1; |  | ||||||
| 
 |  | ||||||
| 	put_page(page); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int try_nonblocking_invalidate(struct inode *inode) | static int try_nonblocking_invalidate(struct inode *inode) | ||||||
| { | { | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| @ -1436,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||||||
| 	invalidate_mapping_pages(&inode->i_data, 0, -1); | 	invalidate_mapping_pages(&inode->i_data, 0, -1); | ||||||
| 	spin_lock(&inode->i_lock); | 	spin_lock(&inode->i_lock); | ||||||
| 
 | 
 | ||||||
| 	if (mapping_is_empty(&inode->i_data) && | 	if (inode->i_data.nrpages == 0 && | ||||||
| 	    invalidating_gen == ci->i_rdcache_gen) { | 	    invalidating_gen == ci->i_rdcache_gen) { | ||||||
| 		/* success. */ | 		/* success. */ | ||||||
| 		dout("try_nonblocking_invalidate %p success\n", inode); | 		dout("try_nonblocking_invalidate %p success\n", inode); | ||||||
| @ -1462,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||||||
| void ceph_check_caps(struct ceph_inode_info *ci, int flags, | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | ||||||
| 		     struct ceph_mds_session *session) | 		     struct ceph_mds_session *session) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct inode *inode = &ci->vfs_inode; | 	struct inode *inode = &ci->vfs_inode; | ||||||
| 	struct ceph_cap *cap; | 	struct ceph_cap *cap; | ||||||
| 	int file_wanted, used; | 	int file_wanted, used; | ||||||
| @ -1533,7 +1523,7 @@ retry_locked: | |||||||
| 	 */ | 	 */ | ||||||
| 	if ((!is_delayed || mdsc->stopping) && | 	if ((!is_delayed || mdsc->stopping) && | ||||||
| 	    ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */ | 	    ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */ | ||||||
| 	    ci->i_rdcache_gen &&                     /* may have cached pages */ | 	    inode->i_data.nrpages &&                 /* have cached pages */ | ||||||
| 	    (file_wanted == 0 ||                     /* no open files */ | 	    (file_wanted == 0 ||                     /* no open files */ | ||||||
| 	     (revoking & (CEPH_CAP_FILE_CACHE| | 	     (revoking & (CEPH_CAP_FILE_CACHE| | ||||||
| 			  CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */ | 			  CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */ | ||||||
| @ -1706,7 +1696,7 @@ ack: | |||||||
| static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | ||||||
| 			  unsigned *flush_tid) | 			  unsigned *flush_tid) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	int unlock_session = session ? 0 : 1; | 	int unlock_session = session ? 0 : 1; | ||||||
| 	int flushing = 0; | 	int flushing = 0; | ||||||
| @ -1872,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||||||
| 				       caps_are_flushed(inode, flush_tid)); | 				       caps_are_flushed(inode, flush_tid)); | ||||||
| 	} else { | 	} else { | ||||||
| 		struct ceph_mds_client *mdsc = | 		struct ceph_mds_client *mdsc = | ||||||
| 			&ceph_sb_to_client(inode->i_sb)->mdsc; | 			ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 
 | 
 | ||||||
| 		spin_lock(&inode->i_lock); | 		spin_lock(&inode->i_lock); | ||||||
| 		if (__ceph_caps_dirty(ci)) | 		if (__ceph_caps_dirty(ci)) | ||||||
| @ -2465,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||||||
| 	__releases(inode->i_lock) | 	__releases(inode->i_lock) | ||||||
| { | { | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 	unsigned seq = le32_to_cpu(m->seq); | 	unsigned seq = le32_to_cpu(m->seq); | ||||||
| 	int dirty = le32_to_cpu(m->dirty); | 	int dirty = le32_to_cpu(m->dirty); | ||||||
| 	int cleaned = 0; | 	int cleaned = 0; | ||||||
| @ -2713,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||||||
| 		      struct ceph_msg *msg) | 		      struct ceph_msg *msg) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = session->s_mdsc; | 	struct ceph_mds_client *mdsc = session->s_mdsc; | ||||||
| 	struct super_block *sb = mdsc->client->sb; | 	struct super_block *sb = mdsc->fsc->sb; | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 	struct ceph_cap *cap; | 	struct ceph_cap *cap; | ||||||
| 	struct ceph_mds_caps *h; | 	struct ceph_mds_caps *h; | ||||||
|  | |||||||
| @ -1,7 +1,8 @@ | |||||||
| /*
 | /*
 | ||||||
|  * Ceph 'frag' type |  * Ceph 'frag' type | ||||||
|  */ |  */ | ||||||
| #include "types.h" | #include <linux/module.h> | ||||||
|  | #include <linux/ceph/types.h> | ||||||
| 
 | 
 | ||||||
| int ceph_frag_compare(__u32 a, __u32 b) | int ceph_frag_compare(__u32 a, __u32 b) | ||||||
| { | { | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/device.h> | #include <linux/device.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| @ -7,143 +7,49 @@ | |||||||
| #include <linux/debugfs.h> | #include <linux/debugfs.h> | ||||||
| #include <linux/seq_file.h> | #include <linux/seq_file.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/ceph/libceph.h> | ||||||
|  | #include <linux/ceph/mon_client.h> | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | #include <linux/ceph/debugfs.h> | ||||||
|  | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "mds_client.h" |  | ||||||
| #include "mon_client.h" |  | ||||||
| #include "auth.h" |  | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_DEBUG_FS | #ifdef CONFIG_DEBUG_FS | ||||||
| 
 | 
 | ||||||
| /*
 | #include "mds_client.h" | ||||||
|  * Implement /sys/kernel/debug/ceph fun |  | ||||||
|  * |  | ||||||
|  * /sys/kernel/debug/ceph/client*  - an instance of the ceph client |  | ||||||
|  *      .../osdmap      - current osdmap |  | ||||||
|  *      .../mdsmap      - current mdsmap |  | ||||||
|  *      .../monmap      - current monmap |  | ||||||
|  *      .../osdc        - active osd requests |  | ||||||
|  *      .../mdsc        - active mds requests |  | ||||||
|  *      .../monc        - mon client state |  | ||||||
|  *      .../dentry_lru  - dump contents of dentry lru |  | ||||||
|  *      .../caps        - expose cap (reservation) stats |  | ||||||
|  *      .../bdi         - symlink to ../../bdi/something |  | ||||||
|  */ |  | ||||||
| 
 |  | ||||||
| static struct dentry *ceph_debugfs_dir; |  | ||||||
| 
 |  | ||||||
| static int monmap_show(struct seq_file *s, void *p) |  | ||||||
| { |  | ||||||
| 	int i; |  | ||||||
| 	struct ceph_client *client = s->private; |  | ||||||
| 
 |  | ||||||
| 	if (client->monc.monmap == NULL) |  | ||||||
| 		return 0; |  | ||||||
| 
 |  | ||||||
| 	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); |  | ||||||
| 	for (i = 0; i < client->monc.monmap->num_mon; i++) { |  | ||||||
| 		struct ceph_entity_inst *inst = |  | ||||||
| 			&client->monc.monmap->mon_inst[i]; |  | ||||||
| 
 |  | ||||||
| 		seq_printf(s, "\t%s%lld\t%s\n", |  | ||||||
| 			   ENTITY_NAME(inst->name), |  | ||||||
| 			   pr_addr(&inst->addr.in_addr)); |  | ||||||
| 	} |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| static int mdsmap_show(struct seq_file *s, void *p) | static int mdsmap_show(struct seq_file *s, void *p) | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
| 	struct ceph_client *client = s->private; | 	struct ceph_fs_client *fsc = s->private; | ||||||
| 
 | 
 | ||||||
| 	if (client->mdsc.mdsmap == NULL) | 	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) | ||||||
| 		return 0; | 		return 0; | ||||||
| 	seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch); | 	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); | ||||||
| 	seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root); | 	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); | ||||||
| 	seq_printf(s, "session_timeout %d\n", | 	seq_printf(s, "session_timeout %d\n", | ||||||
| 		       client->mdsc.mdsmap->m_session_timeout); | 		       fsc->mdsc->mdsmap->m_session_timeout); | ||||||
| 	seq_printf(s, "session_autoclose %d\n", | 	seq_printf(s, "session_autoclose %d\n", | ||||||
| 		       client->mdsc.mdsmap->m_session_autoclose); | 		       fsc->mdsc->mdsmap->m_session_autoclose); | ||||||
| 	for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) { | 	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { | ||||||
| 		struct ceph_entity_addr *addr = | 		struct ceph_entity_addr *addr = | ||||||
| 			&client->mdsc.mdsmap->m_info[i].addr; | 			&fsc->mdsc->mdsmap->m_info[i].addr; | ||||||
| 		int state = client->mdsc.mdsmap->m_info[i].state; | 		int state = fsc->mdsc->mdsmap->m_info[i].state; | ||||||
| 
 | 
 | ||||||
| 		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr), | 		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, | ||||||
|  | 			       ceph_pr_addr(&addr->in_addr), | ||||||
| 			       ceph_mds_state_name(state)); | 			       ceph_mds_state_name(state)); | ||||||
| 	} | 	} | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int osdmap_show(struct seq_file *s, void *p) | /*
 | ||||||
| { |  * mdsc debugfs | ||||||
| 	int i; |  */ | ||||||
| 	struct ceph_client *client = s->private; |  | ||||||
| 	struct rb_node *n; |  | ||||||
| 
 |  | ||||||
| 	if (client->osdc.osdmap == NULL) |  | ||||||
| 		return 0; |  | ||||||
| 	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); |  | ||||||
| 	seq_printf(s, "flags%s%s\n", |  | ||||||
| 		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? |  | ||||||
| 		   " NEARFULL" : "", |  | ||||||
| 		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? |  | ||||||
| 		   " FULL" : ""); |  | ||||||
| 	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { |  | ||||||
| 		struct ceph_pg_pool_info *pool = |  | ||||||
| 			rb_entry(n, struct ceph_pg_pool_info, node); |  | ||||||
| 		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", |  | ||||||
| 			   pool->id, pool->v.pg_num, pool->pg_num_mask, |  | ||||||
| 			   pool->v.lpg_num, pool->lpg_num_mask); |  | ||||||
| 	} |  | ||||||
| 	for (i = 0; i < client->osdc.osdmap->max_osd; i++) { |  | ||||||
| 		struct ceph_entity_addr *addr = |  | ||||||
| 			&client->osdc.osdmap->osd_addr[i]; |  | ||||||
| 		int state = client->osdc.osdmap->osd_state[i]; |  | ||||||
| 		char sb[64]; |  | ||||||
| 
 |  | ||||||
| 		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", |  | ||||||
| 			   i, pr_addr(&addr->in_addr), |  | ||||||
| 			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16), |  | ||||||
| 			   ceph_osdmap_state_str(sb, sizeof(sb), state)); |  | ||||||
| 	} |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int monc_show(struct seq_file *s, void *p) |  | ||||||
| { |  | ||||||
| 	struct ceph_client *client = s->private; |  | ||||||
| 	struct ceph_mon_generic_request *req; |  | ||||||
| 	struct ceph_mon_client *monc = &client->monc; |  | ||||||
| 	struct rb_node *rp; |  | ||||||
| 
 |  | ||||||
| 	mutex_lock(&monc->mutex); |  | ||||||
| 
 |  | ||||||
| 	if (monc->have_mdsmap) |  | ||||||
| 		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); |  | ||||||
| 	if (monc->have_osdmap) |  | ||||||
| 		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); |  | ||||||
| 	if (monc->want_next_osdmap) |  | ||||||
| 		seq_printf(s, "want next osdmap\n"); |  | ||||||
| 
 |  | ||||||
| 	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { |  | ||||||
| 		__u16 op; |  | ||||||
| 		req = rb_entry(rp, struct ceph_mon_generic_request, node); |  | ||||||
| 		op = le16_to_cpu(req->request->hdr.type); |  | ||||||
| 		if (op == CEPH_MSG_STATFS) |  | ||||||
| 			seq_printf(s, "%lld statfs\n", req->tid); |  | ||||||
| 		else |  | ||||||
| 			seq_printf(s, "%lld unknown\n", req->tid); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	mutex_unlock(&monc->mutex); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int mdsc_show(struct seq_file *s, void *p) | static int mdsc_show(struct seq_file *s, void *p) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = s->private; | 	struct ceph_fs_client *fsc = s->private; | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	struct rb_node *rp; | 	struct rb_node *rp; | ||||||
| 	int pathlen; | 	int pathlen; | ||||||
| @ -214,61 +120,12 @@ static int mdsc_show(struct seq_file *s, void *p) | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int osdc_show(struct seq_file *s, void *pp) |  | ||||||
| { |  | ||||||
| 	struct ceph_client *client = s->private; |  | ||||||
| 	struct ceph_osd_client *osdc = &client->osdc; |  | ||||||
| 	struct rb_node *p; |  | ||||||
| 
 |  | ||||||
| 	mutex_lock(&osdc->request_mutex); |  | ||||||
| 	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { |  | ||||||
| 		struct ceph_osd_request *req; |  | ||||||
| 		struct ceph_osd_request_head *head; |  | ||||||
| 		struct ceph_osd_op *op; |  | ||||||
| 		int num_ops; |  | ||||||
| 		int opcode, olen; |  | ||||||
| 		int i; |  | ||||||
| 
 |  | ||||||
| 		req = rb_entry(p, struct ceph_osd_request, r_node); |  | ||||||
| 
 |  | ||||||
| 		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, |  | ||||||
| 			   req->r_osd ? req->r_osd->o_osd : -1, |  | ||||||
| 			   le32_to_cpu(req->r_pgid.pool), |  | ||||||
| 			   le16_to_cpu(req->r_pgid.ps)); |  | ||||||
| 
 |  | ||||||
| 		head = req->r_request->front.iov_base; |  | ||||||
| 		op = (void *)(head + 1); |  | ||||||
| 
 |  | ||||||
| 		num_ops = le16_to_cpu(head->num_ops); |  | ||||||
| 		olen = le32_to_cpu(head->object_len); |  | ||||||
| 		seq_printf(s, "%.*s", olen, |  | ||||||
| 			   (const char *)(head->ops + num_ops)); |  | ||||||
| 
 |  | ||||||
| 		if (req->r_reassert_version.epoch) |  | ||||||
| 			seq_printf(s, "\t%u'%llu", |  | ||||||
| 			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch), |  | ||||||
| 			   le64_to_cpu(req->r_reassert_version.version)); |  | ||||||
| 		else |  | ||||||
| 			seq_printf(s, "\t"); |  | ||||||
| 
 |  | ||||||
| 		for (i = 0; i < num_ops; i++) { |  | ||||||
| 			opcode = le16_to_cpu(op->op); |  | ||||||
| 			seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); |  | ||||||
| 			op++; |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		seq_printf(s, "\n"); |  | ||||||
| 	} |  | ||||||
| 	mutex_unlock(&osdc->request_mutex); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int caps_show(struct seq_file *s, void *p) | static int caps_show(struct seq_file *s, void *p) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = s->private; | 	struct ceph_fs_client *fsc = s->private; | ||||||
| 	int total, avail, used, reserved, min; | 	int total, avail, used, reserved, min; | ||||||
| 
 | 
 | ||||||
| 	ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); | 	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); | ||||||
| 	seq_printf(s, "total\t\t%d\n" | 	seq_printf(s, "total\t\t%d\n" | ||||||
| 		   "avail\t\t%d\n" | 		   "avail\t\t%d\n" | ||||||
| 		   "used\t\t%d\n" | 		   "used\t\t%d\n" | ||||||
| @ -280,8 +137,8 @@ static int caps_show(struct seq_file *s, void *p) | |||||||
| 
 | 
 | ||||||
| static int dentry_lru_show(struct seq_file *s, void *ptr) | static int dentry_lru_show(struct seq_file *s, void *ptr) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = s->private; | 	struct ceph_fs_client *fsc = s->private; | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_dentry_info *di; | 	struct ceph_dentry_info *di; | ||||||
| 
 | 
 | ||||||
| 	spin_lock(&mdsc->dentry_lru_lock); | 	spin_lock(&mdsc->dentry_lru_lock); | ||||||
| @ -295,199 +152,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define DEFINE_SHOW_FUNC(name)						\ | CEPH_DEFINE_SHOW_FUNC(mdsmap_show) | ||||||
| static int name##_open(struct inode *inode, struct file *file)		\ | CEPH_DEFINE_SHOW_FUNC(mdsc_show) | ||||||
| {									\ | CEPH_DEFINE_SHOW_FUNC(caps_show) | ||||||
| 	struct seq_file *sf;						\ | CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) | ||||||
| 	int ret;							\ |  | ||||||
| 									\ |  | ||||||
| 	ret = single_open(file, name, NULL);				\ |  | ||||||
| 	sf = file->private_data;					\ |  | ||||||
| 	sf->private = inode->i_private;					\ |  | ||||||
| 	return ret;							\ |  | ||||||
| }									\ |  | ||||||
| 									\ |  | ||||||
| static const struct file_operations name##_fops = {			\ |  | ||||||
| 	.open		= name##_open,					\ |  | ||||||
| 	.read		= seq_read,					\ |  | ||||||
| 	.llseek		= seq_lseek,					\ |  | ||||||
| 	.release	= single_release,				\ |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| DEFINE_SHOW_FUNC(monmap_show) |  | ||||||
| DEFINE_SHOW_FUNC(mdsmap_show) |  | ||||||
| DEFINE_SHOW_FUNC(osdmap_show) |  | ||||||
| DEFINE_SHOW_FUNC(monc_show) |  | ||||||
| DEFINE_SHOW_FUNC(mdsc_show) |  | ||||||
| DEFINE_SHOW_FUNC(osdc_show) |  | ||||||
| DEFINE_SHOW_FUNC(dentry_lru_show) |  | ||||||
| DEFINE_SHOW_FUNC(caps_show) |  | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * debugfs | ||||||
|  |  */ | ||||||
| static int congestion_kb_set(void *data, u64 val) | static int congestion_kb_set(void *data, u64 val) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = (struct ceph_client *)data; | 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; | ||||||
| 
 |  | ||||||
| 	if (client) |  | ||||||
| 		client->mount_args->congestion_kb = (int)val; |  | ||||||
| 
 | 
 | ||||||
|  | 	fsc->mount_options->congestion_kb = (int)val; | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int congestion_kb_get(void *data, u64 *val) | static int congestion_kb_get(void *data, u64 *val) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = (struct ceph_client *)data; | 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; | ||||||
| 
 |  | ||||||
| 	if (client) |  | ||||||
| 		*val = (u64)client->mount_args->congestion_kb; |  | ||||||
| 
 | 
 | ||||||
|  | 	*val = (u64)fsc->mount_options->congestion_kb; | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, | ||||||
| 			congestion_kb_set, "%llu\n"); | 			congestion_kb_set, "%llu\n"); | ||||||
| 
 | 
 | ||||||
| int __init ceph_debugfs_init(void) | 
 | ||||||
|  | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) | ||||||
| { | { | ||||||
| 	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | 	dout("ceph_fs_debugfs_cleanup\n"); | ||||||
| 	if (!ceph_debugfs_dir) | 	debugfs_remove(fsc->debugfs_bdi); | ||||||
| 		return -ENOMEM; | 	debugfs_remove(fsc->debugfs_congestion_kb); | ||||||
| 	return 0; | 	debugfs_remove(fsc->debugfs_mdsmap); | ||||||
|  | 	debugfs_remove(fsc->debugfs_caps); | ||||||
|  | 	debugfs_remove(fsc->debugfs_mdsc); | ||||||
|  | 	debugfs_remove(fsc->debugfs_dentry_lru); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ceph_debugfs_cleanup(void) | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) | ||||||
| { | { | ||||||
| 	debugfs_remove(ceph_debugfs_dir); | 	char name[100]; | ||||||
| } | 	int err = -ENOMEM; | ||||||
| 
 | 
 | ||||||
| int ceph_debugfs_client_init(struct ceph_client *client) | 	dout("ceph_fs_debugfs_init\n"); | ||||||
| { | 	fsc->debugfs_congestion_kb = | ||||||
| 	int ret = 0; |  | ||||||
| 	char name[80]; |  | ||||||
| 
 |  | ||||||
| 	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, |  | ||||||
| 		 client->monc.auth->global_id); |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); |  | ||||||
| 	if (!client->debugfs_dir) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->monc.debugfs_file = debugfs_create_file("monc", |  | ||||||
| 						      0600, |  | ||||||
| 						      client->debugfs_dir, |  | ||||||
| 						      client, |  | ||||||
| 						      &monc_show_fops); |  | ||||||
| 	if (!client->monc.debugfs_file) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->mdsc.debugfs_file = debugfs_create_file("mdsc", |  | ||||||
| 						      0600, |  | ||||||
| 						      client->debugfs_dir, |  | ||||||
| 						      client, |  | ||||||
| 						      &mdsc_show_fops); |  | ||||||
| 	if (!client->mdsc.debugfs_file) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->osdc.debugfs_file = debugfs_create_file("osdc", |  | ||||||
| 						      0600, |  | ||||||
| 						      client->debugfs_dir, |  | ||||||
| 						      client, |  | ||||||
| 						      &osdc_show_fops); |  | ||||||
| 	if (!client->osdc.debugfs_file) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_monmap = debugfs_create_file("monmap", |  | ||||||
| 					0600, |  | ||||||
| 					client->debugfs_dir, |  | ||||||
| 					client, |  | ||||||
| 					&monmap_show_fops); |  | ||||||
| 	if (!client->debugfs_monmap) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_mdsmap = debugfs_create_file("mdsmap", |  | ||||||
| 					0600, |  | ||||||
| 					client->debugfs_dir, |  | ||||||
| 					client, |  | ||||||
| 					&mdsmap_show_fops); |  | ||||||
| 	if (!client->debugfs_mdsmap) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_osdmap = debugfs_create_file("osdmap", |  | ||||||
| 					0600, |  | ||||||
| 					client->debugfs_dir, |  | ||||||
| 					client, |  | ||||||
| 					&osdmap_show_fops); |  | ||||||
| 	if (!client->debugfs_osdmap) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_dentry_lru = debugfs_create_file("dentry_lru", |  | ||||||
| 					0600, |  | ||||||
| 					client->debugfs_dir, |  | ||||||
| 					client, |  | ||||||
| 					&dentry_lru_show_fops); |  | ||||||
| 	if (!client->debugfs_dentry_lru) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_caps = debugfs_create_file("caps", |  | ||||||
| 						   0400, |  | ||||||
| 						   client->debugfs_dir, |  | ||||||
| 						   client, |  | ||||||
| 						   &caps_show_fops); |  | ||||||
| 	if (!client->debugfs_caps) |  | ||||||
| 		goto out; |  | ||||||
| 
 |  | ||||||
| 	client->debugfs_congestion_kb = |  | ||||||
| 		debugfs_create_file("writeback_congestion_kb", | 		debugfs_create_file("writeback_congestion_kb", | ||||||
| 				    0600, | 				    0600, | ||||||
| 				    client->debugfs_dir, | 				    fsc->client->debugfs_dir, | ||||||
| 				    client, | 				    fsc, | ||||||
| 				    &congestion_kb_fops); | 				    &congestion_kb_fops); | ||||||
| 	if (!client->debugfs_congestion_kb) | 	if (!fsc->debugfs_congestion_kb) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev)); | 	dout("a\n"); | ||||||
| 	client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir, | 
 | ||||||
| 						     name); | 	snprintf(name, sizeof(name), "../../bdi/%s", | ||||||
|  | 		 dev_name(fsc->backing_dev_info.dev)); | ||||||
|  | 	fsc->debugfs_bdi = | ||||||
|  | 		debugfs_create_symlink("bdi", | ||||||
|  | 				       fsc->client->debugfs_dir, | ||||||
|  | 				       name); | ||||||
|  | 	if (!fsc->debugfs_bdi) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	dout("b\n"); | ||||||
|  | 	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||||||
|  | 					0600, | ||||||
|  | 					fsc->client->debugfs_dir, | ||||||
|  | 					fsc, | ||||||
|  | 					&mdsmap_show_fops); | ||||||
|  | 	if (!fsc->debugfs_mdsmap) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	dout("ca\n"); | ||||||
|  | 	fsc->debugfs_mdsc = debugfs_create_file("mdsc", | ||||||
|  | 						0600, | ||||||
|  | 						fsc->client->debugfs_dir, | ||||||
|  | 						fsc, | ||||||
|  | 						&mdsc_show_fops); | ||||||
|  | 	if (!fsc->debugfs_mdsc) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	dout("da\n"); | ||||||
|  | 	fsc->debugfs_caps = debugfs_create_file("caps", | ||||||
|  | 						   0400, | ||||||
|  | 						   fsc->client->debugfs_dir, | ||||||
|  | 						   fsc, | ||||||
|  | 						   &caps_show_fops); | ||||||
|  | 	if (!fsc->debugfs_caps) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	dout("ea\n"); | ||||||
|  | 	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", | ||||||
|  | 					0600, | ||||||
|  | 					fsc->client->debugfs_dir, | ||||||
|  | 					fsc, | ||||||
|  | 					&dentry_lru_show_fops); | ||||||
|  | 	if (!fsc->debugfs_dentry_lru) | ||||||
|  | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| 
 | 
 | ||||||
| out: | out: | ||||||
| 	ceph_debugfs_client_cleanup(client); | 	ceph_fs_debugfs_cleanup(fsc); | ||||||
| 	return ret; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ceph_debugfs_client_cleanup(struct ceph_client *client) |  | ||||||
| { |  | ||||||
| 	debugfs_remove(client->debugfs_bdi); |  | ||||||
| 	debugfs_remove(client->debugfs_caps); |  | ||||||
| 	debugfs_remove(client->debugfs_dentry_lru); |  | ||||||
| 	debugfs_remove(client->debugfs_osdmap); |  | ||||||
| 	debugfs_remove(client->debugfs_mdsmap); |  | ||||||
| 	debugfs_remove(client->debugfs_monmap); |  | ||||||
| 	debugfs_remove(client->osdc.debugfs_file); |  | ||||||
| 	debugfs_remove(client->mdsc.debugfs_file); |  | ||||||
| 	debugfs_remove(client->monc.debugfs_file); |  | ||||||
| 	debugfs_remove(client->debugfs_congestion_kb); |  | ||||||
| 	debugfs_remove(client->debugfs_dir); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| #else  /* CONFIG_DEBUG_FS */ | #else  /* CONFIG_DEBUG_FS */ | ||||||
| 
 | 
 | ||||||
| int __init ceph_debugfs_init(void) | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) | ||||||
| { | { | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ceph_debugfs_cleanup(void) | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) | ||||||
| { |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int ceph_debugfs_client_init(struct ceph_client *client) |  | ||||||
| { |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ceph_debugfs_client_cleanup(struct ceph_client *client) |  | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/spinlock.h> | #include <linux/spinlock.h> | ||||||
| #include <linux/fs_struct.h> | #include <linux/fs_struct.h> | ||||||
| @ -7,6 +7,7 @@ | |||||||
| #include <linux/sched.h> | #include <linux/sched.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
|  | #include "mds_client.h" | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Directory operations: readdir, lookup, create, link, unlink, |  * Directory operations: readdir, lookup, create, link, unlink, | ||||||
| @ -94,10 +95,7 @@ static unsigned fpos_off(loff_t p) | |||||||
|  */ |  */ | ||||||
| static int __dcache_readdir(struct file *filp, | static int __dcache_readdir(struct file *filp, | ||||||
| 			    void *dirent, filldir_t filldir) | 			    void *dirent, filldir_t filldir) | ||||||
| 		__releases(inode->i_lock) |  | ||||||
| 		__acquires(inode->i_lock) |  | ||||||
| { | { | ||||||
| 	struct inode *inode = filp->f_dentry->d_inode; |  | ||||||
| 	struct ceph_file_info *fi = filp->private_data; | 	struct ceph_file_info *fi = filp->private_data; | ||||||
| 	struct dentry *parent = filp->f_dentry; | 	struct dentry *parent = filp->f_dentry; | ||||||
| 	struct inode *dir = parent->d_inode; | 	struct inode *dir = parent->d_inode; | ||||||
| @ -153,7 +151,6 @@ more: | |||||||
| 
 | 
 | ||||||
| 	atomic_inc(&dentry->d_count); | 	atomic_inc(&dentry->d_count); | ||||||
| 	spin_unlock(&dcache_lock); | 	spin_unlock(&dcache_lock); | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| 
 | 
 | ||||||
| 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | ||||||
| 	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | ||||||
| @ -171,35 +168,30 @@ more: | |||||||
| 		} else { | 		} else { | ||||||
| 			dput(last); | 			dput(last); | ||||||
| 		} | 		} | ||||||
| 		last = NULL; |  | ||||||
| 	} | 	} | ||||||
| 
 |  | ||||||
| 	spin_lock(&inode->i_lock); |  | ||||||
| 	spin_lock(&dcache_lock); |  | ||||||
| 
 |  | ||||||
| 	last = dentry; | 	last = dentry; | ||||||
| 
 | 
 | ||||||
| 	if (err < 0) | 	if (err < 0) | ||||||
| 		goto out_unlock; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	p = p->prev; |  | ||||||
| 	filp->f_pos++; | 	filp->f_pos++; | ||||||
| 
 | 
 | ||||||
| 	/* make sure a dentry wasn't dropped while we didn't have dcache_lock */ | 	/* make sure a dentry wasn't dropped while we didn't have dcache_lock */ | ||||||
| 	if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE)) | 	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { | ||||||
| 		goto more; | 		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); | ||||||
| 	dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); | 		err = -EAGAIN; | ||||||
| 	err = -EAGAIN; | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&dcache_lock); | ||||||
|  | 	p = p->prev;	/* advance to next dentry */ | ||||||
|  | 	goto more; | ||||||
| 
 | 
 | ||||||
| out_unlock: | out_unlock: | ||||||
| 	spin_unlock(&dcache_lock); | 	spin_unlock(&dcache_lock); | ||||||
| 
 | out: | ||||||
| 	if (last) { | 	if (last) | ||||||
| 		spin_unlock(&inode->i_lock); |  | ||||||
| 		dput(last); | 		dput(last); | ||||||
| 		spin_lock(&inode->i_lock); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -227,15 +219,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||||||
| 	struct ceph_file_info *fi = filp->private_data; | 	struct ceph_file_info *fi = filp->private_data; | ||||||
| 	struct inode *inode = filp->f_dentry->d_inode; | 	struct inode *inode = filp->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_client *client = ceph_inode_to_client(inode); | 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	unsigned frag = fpos_frag(filp->f_pos); | 	unsigned frag = fpos_frag(filp->f_pos); | ||||||
| 	int off = fpos_off(filp->f_pos); | 	int off = fpos_off(filp->f_pos); | ||||||
| 	int err; | 	int err; | ||||||
| 	u32 ftype; | 	u32 ftype; | ||||||
| 	struct ceph_mds_reply_info_parsed *rinfo; | 	struct ceph_mds_reply_info_parsed *rinfo; | ||||||
| 	const int max_entries = client->mount_args->max_readdir; | 	const int max_entries = fsc->mount_options->max_readdir; | ||||||
| 	const int max_bytes = client->mount_args->max_readdir_bytes; | 	const int max_bytes = fsc->mount_options->max_readdir_bytes; | ||||||
| 
 | 
 | ||||||
| 	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | ||||||
| 	if (fi->at_end) | 	if (fi->at_end) | ||||||
| @ -267,17 +259,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||||||
| 	/* can we use the dcache? */ | 	/* can we use the dcache? */ | ||||||
| 	spin_lock(&inode->i_lock); | 	spin_lock(&inode->i_lock); | ||||||
| 	if ((filp->f_pos == 2 || fi->dentry) && | 	if ((filp->f_pos == 2 || fi->dentry) && | ||||||
| 	    !ceph_test_opt(client, NOASYNCREADDIR) && | 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && | ||||||
| 	    ceph_snap(inode) != CEPH_SNAPDIR && | 	    ceph_snap(inode) != CEPH_SNAPDIR && | ||||||
| 	    (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 	    (ci->i_ceph_flags & CEPH_I_COMPLETE) && | ||||||
| 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | ||||||
|  | 		spin_unlock(&inode->i_lock); | ||||||
| 		err = __dcache_readdir(filp, dirent, filldir); | 		err = __dcache_readdir(filp, dirent, filldir); | ||||||
| 		if (err != -EAGAIN) { | 		if (err != -EAGAIN) | ||||||
| 			spin_unlock(&inode->i_lock); |  | ||||||
| 			return err; | 			return err; | ||||||
| 		} | 	} else { | ||||||
|  | 		spin_unlock(&inode->i_lock); | ||||||
| 	} | 	} | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| 	if (fi->dentry) { | 	if (fi->dentry) { | ||||||
| 		err = note_last_dentry(fi, fi->dentry->d_name.name, | 		err = note_last_dentry(fi, fi->dentry->d_name.name, | ||||||
| 				       fi->dentry->d_name.len); | 				       fi->dentry->d_name.len); | ||||||
| @ -487,14 +479,13 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||||||
| struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | ||||||
| 				  struct dentry *dentry, int err) | 				  struct dentry *dentry, int err) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | ||||||
| 	struct inode *parent = dentry->d_parent->d_inode; | 	struct inode *parent = dentry->d_parent->d_inode; | ||||||
| 
 | 
 | ||||||
| 	/* .snap dir? */ | 	/* .snap dir? */ | ||||||
| 	if (err == -ENOENT && | 	if (err == -ENOENT && | ||||||
| 	    ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */ |  | ||||||
| 	    strcmp(dentry->d_name.name, | 	    strcmp(dentry->d_name.name, | ||||||
| 		   client->mount_args->snapdir_name) == 0) { | 		   fsc->mount_options->snapdir_name) == 0) { | ||||||
| 		struct inode *inode = ceph_get_snapdir(parent); | 		struct inode *inode = ceph_get_snapdir(parent); | ||||||
| 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | ||||||
| 		     dentry, dentry->d_name.len, dentry->d_name.name, inode); | 		     dentry, dentry->d_name.len, dentry->d_name.name, inode); | ||||||
| @ -539,8 +530,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) | |||||||
| static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | ||||||
| 				  struct nameidata *nd) | 				  struct nameidata *nd) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int op; | 	int op; | ||||||
| 	int err; | 	int err; | ||||||
| @ -572,7 +563,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||||||
| 		spin_lock(&dir->i_lock); | 		spin_lock(&dir->i_lock); | ||||||
| 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); | 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); | ||||||
| 		if (strncmp(dentry->d_name.name, | 		if (strncmp(dentry->d_name.name, | ||||||
| 			    client->mount_args->snapdir_name, | 			    fsc->mount_options->snapdir_name, | ||||||
| 			    dentry->d_name.len) && | 			    dentry->d_name.len) && | ||||||
| 		    !is_root_ceph_dentry(dir, dentry) && | 		    !is_root_ceph_dentry(dir, dentry) && | ||||||
| 		    (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 		    (ci->i_ceph_flags & CEPH_I_COMPLETE) && | ||||||
| @ -629,8 +620,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) | |||||||
| static int ceph_mknod(struct inode *dir, struct dentry *dentry, | static int ceph_mknod(struct inode *dir, struct dentry *dentry, | ||||||
| 		      int mode, dev_t rdev) | 		      int mode, dev_t rdev) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| @ -685,8 +676,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, | |||||||
| static int ceph_symlink(struct inode *dir, struct dentry *dentry, | static int ceph_symlink(struct inode *dir, struct dentry *dentry, | ||||||
| 			    const char *dest) | 			    const char *dest) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| @ -716,8 +707,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||||||
| 
 | 
 | ||||||
| static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err = -EROFS; | 	int err = -EROFS; | ||||||
| 	int op; | 	int op; | ||||||
| @ -758,8 +749,8 @@ out: | |||||||
| static int ceph_link(struct dentry *old_dentry, struct inode *dir, | static int ceph_link(struct dentry *old_dentry, struct inode *dir, | ||||||
| 		     struct dentry *dentry) | 		     struct dentry *dentry) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| @ -813,8 +804,8 @@ static int drop_caps_for_unlink(struct inode *inode) | |||||||
|  */ |  */ | ||||||
| static int ceph_unlink(struct inode *dir, struct dentry *dentry) | static int ceph_unlink(struct inode *dir, struct dentry *dentry) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct inode *inode = dentry->d_inode; | 	struct inode *inode = dentry->d_inode; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err = -EROFS; | 	int err = -EROFS; | ||||||
| @ -854,8 +845,8 @@ out: | |||||||
| static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | ||||||
| 		       struct inode *new_dir, struct dentry *new_dentry) | 		       struct inode *new_dir, struct dentry *new_dentry) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| @ -1076,7 +1067,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	int left; | 	int left; | ||||||
| 
 | 
 | ||||||
| 	if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | ||||||
| 		return -EISDIR; | 		return -EISDIR; | ||||||
| 
 | 
 | ||||||
| 	if (!cf->dir_info) { | 	if (!cf->dir_info) { | ||||||
| @ -1177,7 +1168,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||||||
| 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | ||||||
| 	     dn->d_name.len, dn->d_name.name); | 	     dn->d_name.len, dn->d_name.name); | ||||||
| 	if (di) { | 	if (di) { | ||||||
| 		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; | ||||||
| 		spin_lock(&mdsc->dentry_lru_lock); | 		spin_lock(&mdsc->dentry_lru_lock); | ||||||
| 		list_add_tail(&di->lru, &mdsc->dentry_lru); | 		list_add_tail(&di->lru, &mdsc->dentry_lru); | ||||||
| 		mdsc->num_dentry++; | 		mdsc->num_dentry++; | ||||||
| @ -1193,7 +1184,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||||||
| 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | ||||||
| 	     dn->d_name.len, dn->d_name.name, di->offset); | 	     dn->d_name.len, dn->d_name.name, di->offset); | ||||||
| 	if (di) { | 	if (di) { | ||||||
| 		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; | ||||||
| 		spin_lock(&mdsc->dentry_lru_lock); | 		spin_lock(&mdsc->dentry_lru_lock); | ||||||
| 		list_move_tail(&di->lru, &mdsc->dentry_lru); | 		list_move_tail(&di->lru, &mdsc->dentry_lru); | ||||||
| 		spin_unlock(&mdsc->dentry_lru_lock); | 		spin_unlock(&mdsc->dentry_lru_lock); | ||||||
| @ -1208,7 +1199,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||||||
| 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | ||||||
| 	     dn->d_name.len, dn->d_name.name); | 	     dn->d_name.len, dn->d_name.name); | ||||||
| 	if (di) { | 	if (di) { | ||||||
| 		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; | ||||||
| 		spin_lock(&mdsc->dentry_lru_lock); | 		spin_lock(&mdsc->dentry_lru_lock); | ||||||
| 		list_del_init(&di->lru); | 		list_del_init(&di->lru); | ||||||
| 		mdsc->num_dentry--; | 		mdsc->num_dentry--; | ||||||
|  | |||||||
| @ -1,10 +1,11 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/exportfs.h> | #include <linux/exportfs.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <asm/unaligned.h> | #include <asm/unaligned.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
|  | #include "mds_client.h" | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * NFS export support |  * NFS export support | ||||||
| @ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||||||
| static struct dentry *__cfh_to_dentry(struct super_block *sb, | static struct dentry *__cfh_to_dentry(struct super_block *sb, | ||||||
| 				      struct ceph_nfs_confh *cfh) | 				      struct ceph_nfs_confh *cfh) | ||||||
| { | { | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 	struct dentry *dentry; | 	struct dentry *dentry; | ||||||
| 	struct ceph_vino vino; | 	struct ceph_vino vino; | ||||||
|  | |||||||
							
								
								
									
										207
									
								
								fs/ceph/file.c
									
									
									
									
									
								
							
							
						
						
									
										207
									
								
								fs/ceph/file.c
									
									
									
									
									
								
							| @ -1,5 +1,6 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/module.h> | ||||||
| #include <linux/sched.h> | #include <linux/sched.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/file.h> | #include <linux/file.h> | ||||||
| @ -38,8 +39,8 @@ | |||||||
| static struct ceph_mds_request * | static struct ceph_mds_request * | ||||||
| prepare_open_request(struct super_block *sb, int flags, int create_mode) | prepare_open_request(struct super_block *sb, int flags, int create_mode) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int want_auth = USE_ANY_MDS; | 	int want_auth = USE_ANY_MDS; | ||||||
| 	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | ||||||
| @ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||||||
| int ceph_open(struct inode *inode, struct file *file) | int ceph_open(struct inode *inode, struct file *file) | ||||||
| { | { | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	struct ceph_file_info *cf = file->private_data; | 	struct ceph_file_info *cf = file->private_data; | ||||||
| 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | ||||||
| @ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||||||
| 				struct nameidata *nd, int mode, | 				struct nameidata *nd, int mode, | ||||||
| 				int locked_dir) | 				int locked_dir) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct file *file = nd->intent.open.file; | 	struct file *file = nd->intent.open.file; | ||||||
| 	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| @ -269,163 +270,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * build a vector of user pages |  | ||||||
|  */ |  | ||||||
| static struct page **get_direct_page_vector(const char __user *data, |  | ||||||
| 					    int num_pages, |  | ||||||
| 					    loff_t off, size_t len) |  | ||||||
| { |  | ||||||
| 	struct page **pages; |  | ||||||
| 	int rc; |  | ||||||
| 
 |  | ||||||
| 	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); |  | ||||||
| 	if (!pages) |  | ||||||
| 		return ERR_PTR(-ENOMEM); |  | ||||||
| 
 |  | ||||||
| 	down_read(¤t->mm->mmap_sem); |  | ||||||
| 	rc = get_user_pages(current, current->mm, (unsigned long)data, |  | ||||||
| 			    num_pages, 0, 0, pages, NULL); |  | ||||||
| 	up_read(¤t->mm->mmap_sem); |  | ||||||
| 	if (rc < 0) |  | ||||||
| 		goto fail; |  | ||||||
| 	return pages; |  | ||||||
| 
 |  | ||||||
| fail: |  | ||||||
| 	kfree(pages); |  | ||||||
| 	return ERR_PTR(rc); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void put_page_vector(struct page **pages, int num_pages) |  | ||||||
| { |  | ||||||
| 	int i; |  | ||||||
| 
 |  | ||||||
| 	for (i = 0; i < num_pages; i++) |  | ||||||
| 		put_page(pages[i]); |  | ||||||
| 	kfree(pages); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ceph_release_page_vector(struct page **pages, int num_pages) |  | ||||||
| { |  | ||||||
| 	int i; |  | ||||||
| 
 |  | ||||||
| 	for (i = 0; i < num_pages; i++) |  | ||||||
| 		__free_pages(pages[i], 0); |  | ||||||
| 	kfree(pages); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * allocate a vector new pages |  | ||||||
|  */ |  | ||||||
| static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |  | ||||||
| { |  | ||||||
| 	struct page **pages; |  | ||||||
| 	int i; |  | ||||||
| 
 |  | ||||||
| 	pages = kmalloc(sizeof(*pages) * num_pages, flags); |  | ||||||
| 	if (!pages) |  | ||||||
| 		return ERR_PTR(-ENOMEM); |  | ||||||
| 	for (i = 0; i < num_pages; i++) { |  | ||||||
| 		pages[i] = __page_cache_alloc(flags); |  | ||||||
| 		if (pages[i] == NULL) { |  | ||||||
| 			ceph_release_page_vector(pages, i); |  | ||||||
| 			return ERR_PTR(-ENOMEM); |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return pages; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * copy user data into a page vector |  | ||||||
|  */ |  | ||||||
| static int copy_user_to_page_vector(struct page **pages, |  | ||||||
| 				    const char __user *data, |  | ||||||
| 				    loff_t off, size_t len) |  | ||||||
| { |  | ||||||
| 	int i = 0; |  | ||||||
| 	int po = off & ~PAGE_CACHE_MASK; |  | ||||||
| 	int left = len; |  | ||||||
| 	int l, bad; |  | ||||||
| 
 |  | ||||||
| 	while (left > 0) { |  | ||||||
| 		l = min_t(int, PAGE_CACHE_SIZE-po, left); |  | ||||||
| 		bad = copy_from_user(page_address(pages[i]) + po, data, l); |  | ||||||
| 		if (bad == l) |  | ||||||
| 			return -EFAULT; |  | ||||||
| 		data += l - bad; |  | ||||||
| 		left -= l - bad; |  | ||||||
| 		po += l - bad; |  | ||||||
| 		if (po == PAGE_CACHE_SIZE) { |  | ||||||
| 			po = 0; |  | ||||||
| 			i++; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return len; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * copy user data from a page vector into a user pointer |  | ||||||
|  */ |  | ||||||
| static int copy_page_vector_to_user(struct page **pages, char __user *data, |  | ||||||
| 				    loff_t off, size_t len) |  | ||||||
| { |  | ||||||
| 	int i = 0; |  | ||||||
| 	int po = off & ~PAGE_CACHE_MASK; |  | ||||||
| 	int left = len; |  | ||||||
| 	int l, bad; |  | ||||||
| 
 |  | ||||||
| 	while (left > 0) { |  | ||||||
| 		l = min_t(int, left, PAGE_CACHE_SIZE-po); |  | ||||||
| 		bad = copy_to_user(data, page_address(pages[i]) + po, l); |  | ||||||
| 		if (bad == l) |  | ||||||
| 			return -EFAULT; |  | ||||||
| 		data += l - bad; |  | ||||||
| 		left -= l - bad; |  | ||||||
| 		if (po) { |  | ||||||
| 			po += l - bad; |  | ||||||
| 			if (po == PAGE_CACHE_SIZE) |  | ||||||
| 				po = 0; |  | ||||||
| 		} |  | ||||||
| 		i++; |  | ||||||
| 	} |  | ||||||
| 	return len; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Zero an extent within a page vector.  Offset is relative to the |  | ||||||
|  * start of the first page. |  | ||||||
|  */ |  | ||||||
| static void zero_page_vector_range(int off, int len, struct page **pages) |  | ||||||
| { |  | ||||||
| 	int i = off >> PAGE_CACHE_SHIFT; |  | ||||||
| 
 |  | ||||||
| 	off &= ~PAGE_CACHE_MASK; |  | ||||||
| 
 |  | ||||||
| 	dout("zero_page_vector_page %u~%u\n", off, len); |  | ||||||
| 
 |  | ||||||
| 	/* leading partial page? */ |  | ||||||
| 	if (off) { |  | ||||||
| 		int end = min((int)PAGE_CACHE_SIZE, off + len); |  | ||||||
| 		dout("zeroing %d %p head from %d\n", i, pages[i], |  | ||||||
| 		     (int)off); |  | ||||||
| 		zero_user_segment(pages[i], off, end); |  | ||||||
| 		len -= (end - off); |  | ||||||
| 		i++; |  | ||||||
| 	} |  | ||||||
| 	while (len >= PAGE_CACHE_SIZE) { |  | ||||||
| 		dout("zeroing %d %p len=%d\n", i, pages[i], len); |  | ||||||
| 		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); |  | ||||||
| 		len -= PAGE_CACHE_SIZE; |  | ||||||
| 		i++; |  | ||||||
| 	} |  | ||||||
| 	/* trailing partial page? */ |  | ||||||
| 	if (len) { |  | ||||||
| 		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); |  | ||||||
| 		zero_user_segment(pages[i], 0, len); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Read a range of bytes striped over one or more objects.  Iterate over |  * Read a range of bytes striped over one or more objects.  Iterate over | ||||||
|  * objects we stripe over.  (That's not atomic, but good enough for now.) |  * objects we stripe over.  (That's not atomic, but good enough for now.) | ||||||
| @ -438,7 +282,7 @@ static int striped_read(struct inode *inode, | |||||||
| 			struct page **pages, int num_pages, | 			struct page **pages, int num_pages, | ||||||
| 			int *checkeof) | 			int *checkeof) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_inode_to_client(inode); | 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	u64 pos, this_len; | 	u64 pos, this_len; | ||||||
| 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | ||||||
| @ -459,7 +303,7 @@ static int striped_read(struct inode *inode, | |||||||
| 
 | 
 | ||||||
| more: | more: | ||||||
| 	this_len = left; | 	this_len = left; | ||||||
| 	ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | ||||||
| 				  &ci->i_layout, pos, &this_len, | 				  &ci->i_layout, pos, &this_len, | ||||||
| 				  ci->i_truncate_seq, | 				  ci->i_truncate_seq, | ||||||
| 				  ci->i_truncate_size, | 				  ci->i_truncate_size, | ||||||
| @ -477,8 +321,8 @@ more: | |||||||
| 
 | 
 | ||||||
| 		if (read < pos - off) { | 		if (read < pos - off) { | ||||||
| 			dout(" zero gap %llu to %llu\n", off + read, pos); | 			dout(" zero gap %llu to %llu\n", off + read, pos); | ||||||
| 			zero_page_vector_range(page_off + read, | 			ceph_zero_page_vector_range(page_off + read, | ||||||
| 					       pos - off - read, pages); | 						    pos - off - read, pages); | ||||||
| 		} | 		} | ||||||
| 		pos += ret; | 		pos += ret; | ||||||
| 		read = pos - off; | 		read = pos - off; | ||||||
| @ -495,8 +339,8 @@ more: | |||||||
| 		/* was original extent fully inside i_size? */ | 		/* was original extent fully inside i_size? */ | ||||||
| 		if (pos + left <= inode->i_size) { | 		if (pos + left <= inode->i_size) { | ||||||
| 			dout("zero tail\n"); | 			dout("zero tail\n"); | ||||||
| 			zero_page_vector_range(page_off + read, len - read, | 			ceph_zero_page_vector_range(page_off + read, len - read, | ||||||
| 					       pages); | 						    pages); | ||||||
| 			read = len; | 			read = len; | ||||||
| 			goto out; | 			goto out; | ||||||
| 		} | 		} | ||||||
| @ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||||||
| 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | ||||||
| 
 | 
 | ||||||
| 	if (file->f_flags & O_DIRECT) { | 	if (file->f_flags & O_DIRECT) { | ||||||
| 		pages = get_direct_page_vector(data, num_pages, off, len); | 		pages = ceph_get_direct_page_vector(data, num_pages, off, len); | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * flush any page cache pages in this range.  this | 		 * flush any page cache pages in this range.  this | ||||||
| @ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||||||
| 	ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 	ret = striped_read(inode, off, len, pages, num_pages, checkeof); | ||||||
| 
 | 
 | ||||||
| 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | ||||||
| 		ret = copy_page_vector_to_user(pages, data, off, ret); | 		ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | ||||||
| 	if (ret >= 0) | 	if (ret >= 0) | ||||||
| 		*poff = off + ret; | 		*poff = off + ret; | ||||||
| 
 | 
 | ||||||
| done: | done: | ||||||
| 	if (file->f_flags & O_DIRECT) | 	if (file->f_flags & O_DIRECT) | ||||||
| 		put_page_vector(pages, num_pages); | 		ceph_put_page_vector(pages, num_pages); | ||||||
| 	else | 	else | ||||||
| 		ceph_release_page_vector(pages, num_pages); | 		ceph_release_page_vector(pages, num_pages); | ||||||
| 	dout("sync_read result %d\n", ret); | 	dout("sync_read result %d\n", ret); | ||||||
| @ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||||||
| { | { | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_client *client = ceph_inode_to_client(inode); | 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||||||
| 	struct ceph_osd_request *req; | 	struct ceph_osd_request *req; | ||||||
| 	struct page **pages; | 	struct page **pages; | ||||||
| 	int num_pages; | 	int num_pages; | ||||||
| @ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||||||
| 	 */ | 	 */ | ||||||
| more: | more: | ||||||
| 	len = left; | 	len = left; | ||||||
| 	req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||||||
| 				    ceph_vino(inode), pos, &len, | 				    ceph_vino(inode), pos, &len, | ||||||
| 				    CEPH_OSD_OP_WRITE, flags, | 				    CEPH_OSD_OP_WRITE, flags, | ||||||
| 				    ci->i_snap_realm->cached_context, | 				    ci->i_snap_realm->cached_context, | ||||||
| @ -655,7 +499,7 @@ more: | |||||||
| 	num_pages = calc_pages_for(pos, len); | 	num_pages = calc_pages_for(pos, len); | ||||||
| 
 | 
 | ||||||
| 	if (file->f_flags & O_DIRECT) { | 	if (file->f_flags & O_DIRECT) { | ||||||
| 		pages = get_direct_page_vector(data, num_pages, pos, len); | 		pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | ||||||
| 		if (IS_ERR(pages)) { | 		if (IS_ERR(pages)) { | ||||||
| 			ret = PTR_ERR(pages); | 			ret = PTR_ERR(pages); | ||||||
| 			goto out; | 			goto out; | ||||||
| @ -673,7 +517,7 @@ more: | |||||||
| 			ret = PTR_ERR(pages); | 			ret = PTR_ERR(pages); | ||||||
| 			goto out; | 			goto out; | ||||||
| 		} | 		} | ||||||
| 		ret = copy_user_to_page_vector(pages, data, pos, len); | 		ret = ceph_copy_user_to_page_vector(pages, data, pos, len); | ||||||
| 		if (ret < 0) { | 		if (ret < 0) { | ||||||
| 			ceph_release_page_vector(pages, num_pages); | 			ceph_release_page_vector(pages, num_pages); | ||||||
| 			goto out; | 			goto out; | ||||||
| @ -689,7 +533,7 @@ more: | |||||||
| 	req->r_num_pages = num_pages; | 	req->r_num_pages = num_pages; | ||||||
| 	req->r_inode = inode; | 	req->r_inode = inode; | ||||||
| 
 | 
 | ||||||
| 	ret = ceph_osdc_start_request(&client->osdc, req, false); | 	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||||||
| 	if (!ret) { | 	if (!ret) { | ||||||
| 		if (req->r_safe_callback) { | 		if (req->r_safe_callback) { | ||||||
| 			/*
 | 			/*
 | ||||||
| @ -701,11 +545,11 @@ more: | |||||||
| 			spin_unlock(&ci->i_unsafe_lock); | 			spin_unlock(&ci->i_unsafe_lock); | ||||||
| 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | ||||||
| 		} | 		} | ||||||
| 		ret = ceph_osdc_wait_request(&client->osdc, req); | 		ret = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (file->f_flags & O_DIRECT) | 	if (file->f_flags & O_DIRECT) | ||||||
| 		put_page_vector(pages, num_pages); | 		ceph_put_page_vector(pages, num_pages); | ||||||
| 	else if (file->f_flags & O_SYNC) | 	else if (file->f_flags & O_SYNC) | ||||||
| 		ceph_release_page_vector(pages, num_pages); | 		ceph_release_page_vector(pages, num_pages); | ||||||
| 
 | 
 | ||||||
| @ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||||||
| 	struct ceph_file_info *fi = file->private_data; | 	struct ceph_file_info *fi = file->private_data; | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 	struct ceph_osd_client *osdc = | ||||||
|  | 		&ceph_sb_to_client(inode->i_sb)->client->osdc; | ||||||
| 	loff_t endoff = pos + iov->iov_len; | 	loff_t endoff = pos + iov->iov_len; | ||||||
| 	int want, got = 0; | 	int want, got = 0; | ||||||
| 	int ret, err; | 	int ret, err; | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| @ -13,7 +13,8 @@ | |||||||
| #include <linux/pagevec.h> | #include <linux/pagevec.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "decode.h" | #include "mds_client.h" | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Ceph inode operations |  * Ceph inode operations | ||||||
| @ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode) | |||||||
| 	 */ | 	 */ | ||||||
| 	if (ci->i_snap_realm) { | 	if (ci->i_snap_realm) { | ||||||
| 		struct ceph_mds_client *mdsc = | 		struct ceph_mds_client *mdsc = | ||||||
| 			&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||||||
| 		struct ceph_snap_realm *realm = ci->i_snap_realm; | 		struct ceph_snap_realm *realm = ci->i_snap_realm; | ||||||
| 
 | 
 | ||||||
| 		dout(" dropping residual ref to snap realm %p\n", realm); | 		dout(" dropping residual ref to snap realm %p\n", realm); | ||||||
| @ -685,7 +686,7 @@ static int fill_inode(struct inode *inode, | |||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		/* it may be better to set st_size in getattr instead? */ | 		/* it may be better to set st_size in getattr instead? */ | ||||||
| 		if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | 		if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | ||||||
| 			inode->i_size = ci->i_rbytes; | 			inode->i_size = ci->i_rbytes; | ||||||
| 		break; | 		break; | ||||||
| 	default: | 	default: | ||||||
| @ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||||||
| 	struct inode *in = NULL; | 	struct inode *in = NULL; | ||||||
| 	struct ceph_mds_reply_inode *ininfo; | 	struct ceph_mds_reply_inode *ininfo; | ||||||
| 	struct ceph_vino vino; | 	struct ceph_vino vino; | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | ||||||
| 	int i = 0; | 	int i = 0; | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
| 
 | 
 | ||||||
| @ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||||||
| 	 */ | 	 */ | ||||||
| 	if (rinfo->head->is_dentry && !req->r_aborted && | 	if (rinfo->head->is_dentry && !req->r_aborted && | ||||||
| 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | ||||||
| 					       client->mount_args->snapdir_name, | 					       fsc->mount_options->snapdir_name, | ||||||
| 					       req->r_dentry->d_name.len))) { | 					       req->r_dentry->d_name.len))) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * lookup link rename   : null -> possibly existing inode | 		 * lookup link rename   : null -> possibly existing inode | ||||||
| @ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||||||
| 	struct inode *parent_inode = dentry->d_parent->d_inode; | 	struct inode *parent_inode = dentry->d_parent->d_inode; | ||||||
| 	const unsigned int ia_valid = attr->ia_valid; | 	const unsigned int ia_valid = attr->ia_valid; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | ||||||
| 	int issued; | 	int issued; | ||||||
| 	int release = 0, dirtied = 0; | 	int release = 0, dirtied = 0; | ||||||
| 	int mask = 0; | 	int mask = 0; | ||||||
| @ -1728,8 +1729,8 @@ out: | |||||||
|  */ |  */ | ||||||
| int ceph_do_getattr(struct inode *inode, int mask) | int ceph_do_getattr(struct inode *inode, int mask) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,8 +1,10 @@ | |||||||
| #include <linux/in.h> | #include <linux/in.h> | ||||||
| 
 | 
 | ||||||
| #include "ioctl.h" |  | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "ceph_debug.h" | #include "mds_client.h" | ||||||
|  | #include <linux/ceph/ceph_debug.h> | ||||||
|  | 
 | ||||||
|  | #include "ioctl.h" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||||||
| { | { | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	struct ceph_ioctl_layout l; | 	struct ceph_ioctl_layout l; | ||||||
| 	int err, i; | 	int err, i; | ||||||
| @ -89,6 +91,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Set a layout policy on a directory inode. All items in the tree | ||||||
|  |  * rooted at this inode will inherit this layout on creation, | ||||||
|  |  * (It doesn't apply retroactively ) | ||||||
|  |  * unless a subdirectory has its own layout policy. | ||||||
|  |  */ | ||||||
|  | static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | ||||||
|  | { | ||||||
|  | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
|  | 	struct ceph_mds_request *req; | ||||||
|  | 	struct ceph_ioctl_layout l; | ||||||
|  | 	int err, i; | ||||||
|  | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
|  | 
 | ||||||
|  | 	/* copy and validate */ | ||||||
|  | 	if (copy_from_user(&l, arg, sizeof(l))) | ||||||
|  | 		return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 	if ((l.object_size & ~PAGE_MASK) || | ||||||
|  | 	    (l.stripe_unit & ~PAGE_MASK) || | ||||||
|  | 	    !l.stripe_unit || | ||||||
|  | 	    (l.object_size && | ||||||
|  | 	        (unsigned)l.object_size % (unsigned)l.stripe_unit)) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	/* make sure it's a valid data pool */ | ||||||
|  | 	if (l.data_pool > 0) { | ||||||
|  | 		mutex_lock(&mdsc->mutex); | ||||||
|  | 		err = -EINVAL; | ||||||
|  | 		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++) | ||||||
|  | 			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) { | ||||||
|  | 				err = 0; | ||||||
|  | 				break; | ||||||
|  | 			} | ||||||
|  | 		mutex_unlock(&mdsc->mutex); | ||||||
|  | 		if (err) | ||||||
|  | 			return err; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT, | ||||||
|  | 				       USE_AUTH_MDS); | ||||||
|  | 
 | ||||||
|  | 	if (IS_ERR(req)) | ||||||
|  | 		return PTR_ERR(req); | ||||||
|  | 	req->r_inode = igrab(inode); | ||||||
|  | 
 | ||||||
|  | 	req->r_args.setlayout.layout.fl_stripe_unit = | ||||||
|  | 			cpu_to_le32(l.stripe_unit); | ||||||
|  | 	req->r_args.setlayout.layout.fl_stripe_count = | ||||||
|  | 			cpu_to_le32(l.stripe_count); | ||||||
|  | 	req->r_args.setlayout.layout.fl_object_size = | ||||||
|  | 			cpu_to_le32(l.object_size); | ||||||
|  | 	req->r_args.setlayout.layout.fl_pg_pool = | ||||||
|  | 			cpu_to_le32(l.data_pool); | ||||||
|  | 	req->r_args.setlayout.layout.fl_pg_preferred = | ||||||
|  | 			cpu_to_le32(l.preferred_osd); | ||||||
|  | 
 | ||||||
|  | 	err = ceph_mdsc_do_request(mdsc, inode, req); | ||||||
|  | 	ceph_mdsc_put_request(req); | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Return object name, size/offset information, and location (OSD |  * Return object name, size/offset information, and location (OSD | ||||||
|  * number, network address) for a given file offset. |  * number, network address) for a given file offset. | ||||||
| @ -98,7 +162,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||||||
| 	struct ceph_ioctl_dataloc dl; | 	struct ceph_ioctl_dataloc dl; | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 	struct ceph_osd_client *osdc = | ||||||
|  | 		&ceph_sb_to_client(inode->i_sb)->client->osdc; | ||||||
| 	u64 len = 1, olen; | 	u64 len = 1, olen; | ||||||
| 	u64 tmp; | 	u64 tmp; | ||||||
| 	struct ceph_object_layout ol; | 	struct ceph_object_layout ol; | ||||||
| @ -174,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||||||
| 	case CEPH_IOC_SET_LAYOUT: | 	case CEPH_IOC_SET_LAYOUT: | ||||||
| 		return ceph_ioctl_set_layout(file, (void __user *)arg); | 		return ceph_ioctl_set_layout(file, (void __user *)arg); | ||||||
| 
 | 
 | ||||||
|  | 	case CEPH_IOC_SET_LAYOUT_POLICY: | ||||||
|  | 		return ceph_ioctl_set_layout_policy(file, (void __user *)arg); | ||||||
|  | 
 | ||||||
| 	case CEPH_IOC_GET_DATALOC: | 	case CEPH_IOC_GET_DATALOC: | ||||||
| 		return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 		return ceph_ioctl_get_dataloc(file, (void __user *)arg); | ||||||
| 
 | 
 | ||||||
| 	case CEPH_IOC_LAZYIO: | 	case CEPH_IOC_LAZYIO: | ||||||
| 		return ceph_ioctl_lazyio(file); | 		return ceph_ioctl_lazyio(file); | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	return -ENOTTY; | 	return -ENOTTY; | ||||||
| } | } | ||||||
|  | |||||||
| @ -4,7 +4,7 @@ | |||||||
| #include <linux/ioctl.h> | #include <linux/ioctl.h> | ||||||
| #include <linux/types.h> | #include <linux/types.h> | ||||||
| 
 | 
 | ||||||
| #define CEPH_IOCTL_MAGIC 0x97 | #define CEPH_IOCTL_MAGIC 0x98 | ||||||
| 
 | 
 | ||||||
| /* just use u64 to align sanely on all archs */ | /* just use u64 to align sanely on all archs */ | ||||||
| struct ceph_ioctl_layout { | struct ceph_ioctl_layout { | ||||||
| @ -17,6 +17,8 @@ struct ceph_ioctl_layout { | |||||||
| 				   struct ceph_ioctl_layout) | 				   struct ceph_ioctl_layout) | ||||||
| #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\ | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\ | ||||||
| 				   struct ceph_ioctl_layout) | 				   struct ceph_ioctl_layout) | ||||||
|  | #define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5,	\ | ||||||
|  | 				   struct ceph_ioctl_layout) | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Extract identity, address of the OSD and object storing a given |  * Extract identity, address of the OSD and object storing a given | ||||||
|  | |||||||
| @ -1,11 +1,11 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/file.h> | #include <linux/file.h> | ||||||
| #include <linux/namei.h> | #include <linux/namei.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "mds_client.h" | #include "mds_client.h" | ||||||
| #include "pagelist.h" | #include <linux/ceph/pagelist.h> | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * Implement fcntl and flock locking functions. |  * Implement fcntl and flock locking functions. | ||||||
| @ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||||||
| { | { | ||||||
| 	struct inode *inode = file->f_dentry->d_inode; | 	struct inode *inode = file->f_dentry->d_inode; | ||||||
| 	struct ceph_mds_client *mdsc = | 	struct ceph_mds_client *mdsc = | ||||||
| 		&ceph_sb_to_client(inode->i_sb)->mdsc; | 		ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| @ -181,8 +181,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||||||
|  * Encode the flock and fcntl locks for the given inode into the pagelist. |  * Encode the flock and fcntl locks for the given inode into the pagelist. | ||||||
|  * Format is: #fcntl locks, sequential fcntl locks, #flock locks, |  * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | ||||||
|  * sequential flock locks. |  * sequential flock locks. | ||||||
|  * Must be called with BLK already held, and the lock numbers should have |  * Must be called with lock_flocks() already held. | ||||||
|  * been gathered under the same lock holding window. |  * If we encounter more of a specific lock type than expected, | ||||||
|  |  * we return the value 1. | ||||||
|  */ |  */ | ||||||
| int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | ||||||
| 		      int num_fcntl_locks, int num_flock_locks) | 		      int num_fcntl_locks, int num_flock_locks) | ||||||
| @ -190,6 +191,8 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||||||
| 	struct file_lock *lock; | 	struct file_lock *lock; | ||||||
| 	struct ceph_filelock cephlock; | 	struct ceph_filelock cephlock; | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
|  | 	int seen_fcntl = 0; | ||||||
|  | 	int seen_flock = 0; | ||||||
| 
 | 
 | ||||||
| 	dout("encoding %d flock and %d fcntl locks", num_flock_locks, | 	dout("encoding %d flock and %d fcntl locks", num_flock_locks, | ||||||
| 	     num_fcntl_locks); | 	     num_fcntl_locks); | ||||||
| @ -198,6 +201,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||||||
| 		goto fail; | 		goto fail; | ||||||
| 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||||||
| 		if (lock->fl_flags & FL_POSIX) { | 		if (lock->fl_flags & FL_POSIX) { | ||||||
|  | 			++seen_fcntl; | ||||||
|  | 			if (seen_fcntl > num_fcntl_locks) { | ||||||
|  | 				err = -ENOSPC; | ||||||
|  | 				goto fail; | ||||||
|  | 			} | ||||||
| 			err = lock_to_ceph_filelock(lock, &cephlock); | 			err = lock_to_ceph_filelock(lock, &cephlock); | ||||||
| 			if (err) | 			if (err) | ||||||
| 				goto fail; | 				goto fail; | ||||||
| @ -213,6 +221,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||||||
| 		goto fail; | 		goto fail; | ||||||
| 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||||||
| 		if (lock->fl_flags & FL_FLOCK) { | 		if (lock->fl_flags & FL_FLOCK) { | ||||||
|  | 			++seen_flock; | ||||||
|  | 			if (seen_flock > num_flock_locks) { | ||||||
|  | 				err = -ENOSPC; | ||||||
|  | 				goto fail; | ||||||
|  | 			} | ||||||
| 			err = lock_to_ceph_filelock(lock, &cephlock); | 			err = lock_to_ceph_filelock(lock, &cephlock); | ||||||
| 			if (err) | 			if (err) | ||||||
| 				goto fail; | 				goto fail; | ||||||
|  | |||||||
| @ -1,17 +1,21 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/fs.h> | ||||||
| #include <linux/wait.h> | #include <linux/wait.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/sched.h> | #include <linux/sched.h> | ||||||
|  | #include <linux/debugfs.h> | ||||||
|  | #include <linux/seq_file.h> | ||||||
| #include <linux/smp_lock.h> | #include <linux/smp_lock.h> | ||||||
| 
 | 
 | ||||||
| #include "mds_client.h" |  | ||||||
| #include "mon_client.h" |  | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "messenger.h" | #include "mds_client.h" | ||||||
| #include "decode.h" | 
 | ||||||
| #include "auth.h" | #include <linux/ceph/messenger.h> | ||||||
| #include "pagelist.h" | #include <linux/ceph/decode.h> | ||||||
|  | #include <linux/ceph/pagelist.h> | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | #include <linux/ceph/debugfs.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * A cluster of MDS (metadata server) daemons is responsible for |  * A cluster of MDS (metadata server) daemons is responsible for | ||||||
| @ -286,8 +290,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) | |||||||
| 	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); | 	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); | ||||||
| 	if (atomic_dec_and_test(&s->s_ref)) { | 	if (atomic_dec_and_test(&s->s_ref)) { | ||||||
| 		if (s->s_authorizer) | 		if (s->s_authorizer) | ||||||
| 			s->s_mdsc->client->monc.auth->ops->destroy_authorizer( | 		     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( | ||||||
| 				s->s_mdsc->client->monc.auth, s->s_authorizer); | 			     s->s_mdsc->fsc->client->monc.auth, | ||||||
|  | 			     s->s_authorizer); | ||||||
| 		kfree(s); | 		kfree(s); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @ -344,7 +349,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||||||
| 	s->s_seq = 0; | 	s->s_seq = 0; | ||||||
| 	mutex_init(&s->s_mutex); | 	mutex_init(&s->s_mutex); | ||||||
| 
 | 
 | ||||||
| 	ceph_con_init(mdsc->client->msgr, &s->s_con); | 	ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); | ||||||
| 	s->s_con.private = s; | 	s->s_con.private = s; | ||||||
| 	s->s_con.ops = &mds_con_ops; | 	s->s_con.ops = &mds_con_ops; | ||||||
| 	s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | 	s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | ||||||
| @ -599,7 +604,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||||||
| 	} else if (req->r_dentry) { | 	} else if (req->r_dentry) { | ||||||
| 		struct inode *dir = req->r_dentry->d_parent->d_inode; | 		struct inode *dir = req->r_dentry->d_parent->d_inode; | ||||||
| 
 | 
 | ||||||
| 		if (dir->i_sb != mdsc->client->sb) { | 		if (dir->i_sb != mdsc->fsc->sb) { | ||||||
| 			/* not this fs! */ | 			/* not this fs! */ | ||||||
| 			inode = req->r_dentry->d_inode; | 			inode = req->r_dentry->d_inode; | ||||||
| 		} else if (ceph_snap(dir) != CEPH_NOSNAP) { | 		} else if (ceph_snap(dir) != CEPH_NOSNAP) { | ||||||
| @ -884,7 +889,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||||||
| 	__ceph_remove_cap(cap); | 	__ceph_remove_cap(cap); | ||||||
| 	if (!__ceph_is_any_real_caps(ci)) { | 	if (!__ceph_is_any_real_caps(ci)) { | ||||||
| 		struct ceph_mds_client *mdsc = | 		struct ceph_mds_client *mdsc = | ||||||
| 			&ceph_sb_to_client(inode->i_sb)->mdsc; | 			ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 
 | 
 | ||||||
| 		spin_lock(&mdsc->cap_dirty_lock); | 		spin_lock(&mdsc->cap_dirty_lock); | ||||||
| 		if (!list_empty(&ci->i_dirty_item)) { | 		if (!list_empty(&ci->i_dirty_item)) { | ||||||
| @ -1146,7 +1151,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||||||
| 	struct ceph_msg *msg, *partial = NULL; | 	struct ceph_msg *msg, *partial = NULL; | ||||||
| 	struct ceph_mds_cap_release *head; | 	struct ceph_mds_cap_release *head; | ||||||
| 	int err = -ENOMEM; | 	int err = -ENOMEM; | ||||||
| 	int extra = mdsc->client->mount_args->cap_release_safety; | 	int extra = mdsc->fsc->mount_options->cap_release_safety; | ||||||
| 	int num; | 	int num; | ||||||
| 
 | 
 | ||||||
| 	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | 	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | ||||||
| @ -2085,7 +2090,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||||||
| 
 | 
 | ||||||
| 	/* insert trace into our cache */ | 	/* insert trace into our cache */ | ||||||
| 	mutex_lock(&req->r_fill_mutex); | 	mutex_lock(&req->r_fill_mutex); | ||||||
| 	err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | ||||||
| 	if (err == 0) { | 	if (err == 0) { | ||||||
| 		if (result == 0 && rinfo->dir_nr) | 		if (result == 0 && rinfo->dir_nr) | ||||||
| 			ceph_readdir_prepopulate(req, req->r_session); | 			ceph_readdir_prepopulate(req, req->r_session); | ||||||
| @ -2361,19 +2366,35 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||||||
| 
 | 
 | ||||||
| 	if (recon_state->flock) { | 	if (recon_state->flock) { | ||||||
| 		int num_fcntl_locks, num_flock_locks; | 		int num_fcntl_locks, num_flock_locks; | ||||||
|  | 		struct ceph_pagelist_cursor trunc_point; | ||||||
| 
 | 
 | ||||||
| 		lock_kernel(); | 		ceph_pagelist_set_cursor(pagelist, &trunc_point); | ||||||
| 		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 		do { | ||||||
| 		rec.v2.flock_len = (2*sizeof(u32) + | 			lock_flocks(); | ||||||
| 				    (num_fcntl_locks+num_flock_locks) * | 			ceph_count_locks(inode, &num_fcntl_locks, | ||||||
| 				    sizeof(struct ceph_filelock)); | 					 &num_flock_locks); | ||||||
|  | 			rec.v2.flock_len = (2*sizeof(u32) + | ||||||
|  | 					    (num_fcntl_locks+num_flock_locks) * | ||||||
|  | 					    sizeof(struct ceph_filelock)); | ||||||
|  | 			unlock_flocks(); | ||||||
| 
 | 
 | ||||||
| 		err = ceph_pagelist_append(pagelist, &rec, reclen); | 			/* pre-alloc pagelist */ | ||||||
| 		if (!err) | 			ceph_pagelist_truncate(pagelist, &trunc_point); | ||||||
| 			err = ceph_encode_locks(inode, pagelist, | 			err = ceph_pagelist_append(pagelist, &rec, reclen); | ||||||
| 						num_fcntl_locks, | 			if (!err) | ||||||
| 						num_flock_locks); | 				err = ceph_pagelist_reserve(pagelist, | ||||||
| 		unlock_kernel(); | 							    rec.v2.flock_len); | ||||||
|  | 
 | ||||||
|  | 			/* encode locks */ | ||||||
|  | 			if (!err) { | ||||||
|  | 				lock_flocks(); | ||||||
|  | 				err = ceph_encode_locks(inode, | ||||||
|  | 							pagelist, | ||||||
|  | 							num_fcntl_locks, | ||||||
|  | 							num_flock_locks); | ||||||
|  | 				unlock_flocks(); | ||||||
|  | 			} | ||||||
|  | 		} while (err == -ENOSPC); | ||||||
| 	} else { | 	} else { | ||||||
| 		err = ceph_pagelist_append(pagelist, &rec, reclen); | 		err = ceph_pagelist_append(pagelist, &rec, reclen); | ||||||
| 	} | 	} | ||||||
| @ -2613,7 +2634,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||||||
| 			 struct ceph_mds_session *session, | 			 struct ceph_mds_session *session, | ||||||
| 			 struct ceph_msg *msg) | 			 struct ceph_msg *msg) | ||||||
| { | { | ||||||
| 	struct super_block *sb = mdsc->client->sb; | 	struct super_block *sb = mdsc->fsc->sb; | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 	struct ceph_inode_info *ci; | 	struct ceph_inode_info *ci; | ||||||
| 	struct dentry *parent, *dentry; | 	struct dentry *parent, *dentry; | ||||||
| @ -2891,10 +2912,16 @@ static void delayed_work(struct work_struct *work) | |||||||
| 	schedule_delayed(mdsc); | 	schedule_delayed(mdsc); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | int ceph_mdsc_init(struct ceph_fs_client *fsc) | ||||||
| 
 | 
 | ||||||
| int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) |  | ||||||
| { | { | ||||||
| 	mdsc->client = client; | 	struct ceph_mds_client *mdsc; | ||||||
|  | 
 | ||||||
|  | 	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); | ||||||
|  | 	if (!mdsc) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	mdsc->fsc = fsc; | ||||||
|  | 	fsc->mdsc = mdsc; | ||||||
| 	mutex_init(&mdsc->mutex); | 	mutex_init(&mdsc->mutex); | ||||||
| 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | ||||||
| 	if (mdsc->mdsmap == NULL) | 	if (mdsc->mdsmap == NULL) | ||||||
| @ -2927,7 +2954,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||||||
| 	INIT_LIST_HEAD(&mdsc->dentry_lru); | 	INIT_LIST_HEAD(&mdsc->dentry_lru); | ||||||
| 
 | 
 | ||||||
| 	ceph_caps_init(mdsc); | 	ceph_caps_init(mdsc); | ||||||
| 	ceph_adjust_min_caps(mdsc, client->min_caps); | 	ceph_adjust_min_caps(mdsc, fsc->min_caps); | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| @ -2939,7 +2966,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||||||
| static void wait_requests(struct ceph_mds_client *mdsc) | static void wait_requests(struct ceph_mds_client *mdsc) | ||||||
| { | { | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	struct ceph_client *client = mdsc->client; | 	struct ceph_fs_client *fsc = mdsc->fsc; | ||||||
| 
 | 
 | ||||||
| 	mutex_lock(&mdsc->mutex); | 	mutex_lock(&mdsc->mutex); | ||||||
| 	if (__get_oldest_req(mdsc)) { | 	if (__get_oldest_req(mdsc)) { | ||||||
| @ -2947,7 +2974,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) | |||||||
| 
 | 
 | ||||||
| 		dout("wait_requests waiting for requests\n"); | 		dout("wait_requests waiting for requests\n"); | ||||||
| 		wait_for_completion_timeout(&mdsc->safe_umount_waiters, | 		wait_for_completion_timeout(&mdsc->safe_umount_waiters, | ||||||
| 				    client->mount_args->mount_timeout * HZ); | 				    fsc->client->options->mount_timeout * HZ); | ||||||
| 
 | 
 | ||||||
| 		/* tear down remaining requests */ | 		/* tear down remaining requests */ | ||||||
| 		mutex_lock(&mdsc->mutex); | 		mutex_lock(&mdsc->mutex); | ||||||
| @ -3030,7 +3057,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||||||
| { | { | ||||||
| 	u64 want_tid, want_flush; | 	u64 want_tid, want_flush; | ||||||
| 
 | 
 | ||||||
| 	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	dout("sync\n"); | 	dout("sync\n"); | ||||||
| @ -3053,7 +3080,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) | |||||||
| { | { | ||||||
| 	int i, n = 0; | 	int i, n = 0; | ||||||
| 
 | 
 | ||||||
| 	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | ||||||
| 		return true; | 		return true; | ||||||
| 
 | 
 | ||||||
| 	mutex_lock(&mdsc->mutex); | 	mutex_lock(&mdsc->mutex); | ||||||
| @ -3071,8 +3098,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||||||
| { | { | ||||||
| 	struct ceph_mds_session *session; | 	struct ceph_mds_session *session; | ||||||
| 	int i; | 	int i; | ||||||
| 	struct ceph_client *client = mdsc->client; | 	struct ceph_fs_client *fsc = mdsc->fsc; | ||||||
| 	unsigned long timeout = client->mount_args->mount_timeout * HZ; | 	unsigned long timeout = fsc->client->options->mount_timeout * HZ; | ||||||
| 
 | 
 | ||||||
| 	dout("close_sessions\n"); | 	dout("close_sessions\n"); | ||||||
| 
 | 
 | ||||||
| @ -3119,7 +3146,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||||||
| 	dout("stopped\n"); | 	dout("stopped\n"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | ||||||
| { | { | ||||||
| 	dout("stop\n"); | 	dout("stop\n"); | ||||||
| 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ | 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ | ||||||
| @ -3129,6 +3156,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||||||
| 	ceph_caps_finalize(mdsc); | 	ceph_caps_finalize(mdsc); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void ceph_mdsc_destroy(struct ceph_fs_client *fsc) | ||||||
|  | { | ||||||
|  | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
|  | 
 | ||||||
|  | 	ceph_mdsc_stop(mdsc); | ||||||
|  | 	fsc->mdsc = NULL; | ||||||
|  | 	kfree(mdsc); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * handle mds map update. |  * handle mds map update. | ||||||
| @ -3145,14 +3181,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||||||
| 
 | 
 | ||||||
| 	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); | 	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); | ||||||
| 	ceph_decode_copy(&p, &fsid, sizeof(fsid)); | 	ceph_decode_copy(&p, &fsid, sizeof(fsid)); | ||||||
| 	if (ceph_check_fsid(mdsc->client, &fsid) < 0) | 	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) | ||||||
| 		return; | 		return; | ||||||
| 	epoch = ceph_decode_32(&p); | 	epoch = ceph_decode_32(&p); | ||||||
| 	maplen = ceph_decode_32(&p); | 	maplen = ceph_decode_32(&p); | ||||||
| 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen); | 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen); | ||||||
| 
 | 
 | ||||||
| 	/* do we need it? */ | 	/* do we need it? */ | ||||||
| 	ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); | 	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); | ||||||
| 	mutex_lock(&mdsc->mutex); | 	mutex_lock(&mdsc->mutex); | ||||||
| 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { | 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { | ||||||
| 		dout("handle_map epoch %u <= our %u\n", | 		dout("handle_map epoch %u <= our %u\n", | ||||||
| @ -3176,7 +3212,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||||||
| 	} else { | 	} else { | ||||||
| 		mdsc->mdsmap = newmap;  /* first mds map */ | 		mdsc->mdsmap = newmap;  /* first mds map */ | ||||||
| 	} | 	} | ||||||
| 	mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; | 	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; | ||||||
| 
 | 
 | ||||||
| 	__wake_requests(mdsc, &mdsc->waiting_for_map); | 	__wake_requests(mdsc, &mdsc->waiting_for_map); | ||||||
| 
 | 
 | ||||||
| @ -3277,7 +3313,7 @@ static int get_authorizer(struct ceph_connection *con, | |||||||
| { | { | ||||||
| 	struct ceph_mds_session *s = con->private; | 	struct ceph_mds_session *s = con->private; | ||||||
| 	struct ceph_mds_client *mdsc = s->s_mdsc; | 	struct ceph_mds_client *mdsc = s->s_mdsc; | ||||||
| 	struct ceph_auth_client *ac = mdsc->client->monc.auth; | 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; | ||||||
| 	int ret = 0; | 	int ret = 0; | ||||||
| 
 | 
 | ||||||
| 	if (force_new && s->s_authorizer) { | 	if (force_new && s->s_authorizer) { | ||||||
| @ -3311,7 +3347,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) | |||||||
| { | { | ||||||
| 	struct ceph_mds_session *s = con->private; | 	struct ceph_mds_session *s = con->private; | ||||||
| 	struct ceph_mds_client *mdsc = s->s_mdsc; | 	struct ceph_mds_client *mdsc = s->s_mdsc; | ||||||
| 	struct ceph_auth_client *ac = mdsc->client->monc.auth; | 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; | ||||||
| 
 | 
 | ||||||
| 	return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); | 	return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); | ||||||
| } | } | ||||||
| @ -3320,12 +3356,12 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||||||
| { | { | ||||||
| 	struct ceph_mds_session *s = con->private; | 	struct ceph_mds_session *s = con->private; | ||||||
| 	struct ceph_mds_client *mdsc = s->s_mdsc; | 	struct ceph_mds_client *mdsc = s->s_mdsc; | ||||||
| 	struct ceph_auth_client *ac = mdsc->client->monc.auth; | 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; | ||||||
| 
 | 
 | ||||||
| 	if (ac->ops->invalidate_authorizer) | 	if (ac->ops->invalidate_authorizer) | ||||||
| 		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); | 		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); | ||||||
| 
 | 
 | ||||||
| 	return ceph_monc_validate_auth(&mdsc->client->monc); | 	return ceph_monc_validate_auth(&mdsc->fsc->client->monc); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct ceph_connection_operations mds_con_ops = { | static const struct ceph_connection_operations mds_con_ops = { | ||||||
| @ -3338,7 +3374,4 @@ static const struct ceph_connection_operations mds_con_ops = { | |||||||
| 	.peer_reset = peer_reset, | 	.peer_reset = peer_reset, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| /* eof */ | /* eof */ | ||||||
|  | |||||||
| @ -8,9 +8,9 @@ | |||||||
| #include <linux/rbtree.h> | #include <linux/rbtree.h> | ||||||
| #include <linux/spinlock.h> | #include <linux/spinlock.h> | ||||||
| 
 | 
 | ||||||
| #include "types.h" | #include <linux/ceph/types.h> | ||||||
| #include "messenger.h" | #include <linux/ceph/messenger.h> | ||||||
| #include "mdsmap.h" | #include <linux/ceph/mdsmap.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Some lock dependencies: |  * Some lock dependencies: | ||||||
| @ -26,7 +26,7 @@ | |||||||
|  * |  * | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| struct ceph_client; | struct ceph_fs_client; | ||||||
| struct ceph_cap; | struct ceph_cap; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -230,7 +230,7 @@ struct ceph_mds_request { | |||||||
|  * mds client state |  * mds client state | ||||||
|  */ |  */ | ||||||
| struct ceph_mds_client { | struct ceph_mds_client { | ||||||
| 	struct ceph_client      *client; | 	struct ceph_fs_client  *fsc; | ||||||
| 	struct mutex            mutex;         /* all nested structures */ | 	struct mutex            mutex;         /* all nested structures */ | ||||||
| 
 | 
 | ||||||
| 	struct ceph_mdsmap      *mdsmap; | 	struct ceph_mdsmap      *mdsmap; | ||||||
| @ -289,11 +289,6 @@ struct ceph_mds_client { | |||||||
| 	int		caps_avail_count;    /* unused, unreserved */ | 	int		caps_avail_count;    /* unused, unreserved */ | ||||||
| 	int		caps_min_count;      /* keep at least this many
 | 	int		caps_min_count;      /* keep at least this many
 | ||||||
| 						(unreserved) */ | 						(unreserved) */ | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_DEBUG_FS |  | ||||||
| 	struct dentry 	  *debugfs_file; |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| 	spinlock_t	  dentry_lru_lock; | 	spinlock_t	  dentry_lru_lock; | ||||||
| 	struct list_head  dentry_lru; | 	struct list_head  dentry_lru; | ||||||
| 	int		  num_dentry; | 	int		  num_dentry; | ||||||
| @ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s); | |||||||
| extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | ||||||
| 			     struct ceph_msg *msg, int mds); | 			     struct ceph_msg *msg, int mds); | ||||||
| 
 | 
 | ||||||
| extern int ceph_mdsc_init(struct ceph_mds_client *mdsc, | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); | ||||||
| 			   struct ceph_client *client); |  | ||||||
| extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | ||||||
| extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc); | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); | ||||||
| 
 | 
 | ||||||
| extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/bug.h> | #include <linux/bug.h> | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| @ -6,9 +6,9 @@ | |||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/types.h> | #include <linux/types.h> | ||||||
| 
 | 
 | ||||||
| #include "mdsmap.h" | #include <linux/ceph/mdsmap.h> | ||||||
| #include "messenger.h" | #include <linux/ceph/messenger.h> | ||||||
| #include "decode.h" | #include <linux/ceph/decode.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| 
 | 
 | ||||||
| @ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | 		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | ||||||
| 		     i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), | 		     i+1, n, global_id, mds, inc, | ||||||
|  | 		     ceph_pr_addr(&addr.in_addr), | ||||||
| 		     ceph_mds_state_name(state)); | 		     ceph_mds_state_name(state)); | ||||||
| 		if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 		if (mds >= 0 && mds < m->m_max_mds && state > 0) { | ||||||
| 			m->m_info[mds].global_id = global_id; | 			m->m_info[mds].global_id = global_id; | ||||||
|  | |||||||
| @ -1,63 +0,0 @@ | |||||||
| 
 |  | ||||||
| #include <linux/gfp.h> |  | ||||||
| #include <linux/pagemap.h> |  | ||||||
| #include <linux/highmem.h> |  | ||||||
| 
 |  | ||||||
| #include "pagelist.h" |  | ||||||
| 
 |  | ||||||
| static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) |  | ||||||
| { |  | ||||||
| 	struct page *page = list_entry(pl->head.prev, struct page, |  | ||||||
| 				       lru); |  | ||||||
| 	kunmap(page); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int ceph_pagelist_release(struct ceph_pagelist *pl) |  | ||||||
| { |  | ||||||
| 	if (pl->mapped_tail) |  | ||||||
| 		ceph_pagelist_unmap_tail(pl); |  | ||||||
| 
 |  | ||||||
| 	while (!list_empty(&pl->head)) { |  | ||||||
| 		struct page *page = list_first_entry(&pl->head, struct page, |  | ||||||
| 						     lru); |  | ||||||
| 		list_del(&page->lru); |  | ||||||
| 		__free_page(page); |  | ||||||
| 	} |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int ceph_pagelist_addpage(struct ceph_pagelist *pl) |  | ||||||
| { |  | ||||||
| 	struct page *page = __page_cache_alloc(GFP_NOFS); |  | ||||||
| 	if (!page) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 	pl->room += PAGE_SIZE; |  | ||||||
| 	list_add_tail(&page->lru, &pl->head); |  | ||||||
| 	if (pl->mapped_tail) |  | ||||||
| 		ceph_pagelist_unmap_tail(pl); |  | ||||||
| 	pl->mapped_tail = kmap(page); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) |  | ||||||
| { |  | ||||||
| 	while (pl->room < len) { |  | ||||||
| 		size_t bit = pl->room; |  | ||||||
| 		int ret; |  | ||||||
| 
 |  | ||||||
| 		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), |  | ||||||
| 		       buf, bit); |  | ||||||
| 		pl->length += bit; |  | ||||||
| 		pl->room -= bit; |  | ||||||
| 		buf += bit; |  | ||||||
| 		len -= bit; |  | ||||||
| 		ret = ceph_pagelist_addpage(pl); |  | ||||||
| 		if (ret) |  | ||||||
| 			return ret; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); |  | ||||||
| 	pl->length += len; |  | ||||||
| 	pl->room -= len; |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| @ -1,10 +1,12 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/sort.h> | #include <linux/sort.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "decode.h" | #include "mds_client.h" | ||||||
|  | 
 | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Snapshots in ceph are driven in large part by cooperation from the |  * Snapshots in ceph are driven in large part by cooperation from the | ||||||
| @ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||||||
| 			    struct ceph_cap_snap *capsnap) | 			    struct ceph_cap_snap *capsnap) | ||||||
| { | { | ||||||
| 	struct inode *inode = &ci->vfs_inode; | 	struct inode *inode = &ci->vfs_inode; | ||||||
| 	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||||||
| 
 | 
 | ||||||
| 	BUG_ON(capsnap->writing); | 	BUG_ON(capsnap->writing); | ||||||
| 	capsnap->size = inode->i_size; | 	capsnap->size = inode->i_size; | ||||||
| @ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||||||
| 		      struct ceph_mds_session *session, | 		      struct ceph_mds_session *session, | ||||||
| 		      struct ceph_msg *msg) | 		      struct ceph_msg *msg) | ||||||
| { | { | ||||||
| 	struct super_block *sb = mdsc->client->sb; | 	struct super_block *sb = mdsc->fsc->sb; | ||||||
| 	int mds = session->s_mds; | 	int mds = session->s_mds; | ||||||
| 	u64 split; | 	u64 split; | ||||||
| 	int op; | 	int op; | ||||||
|  | |||||||
| @ -1,71 +1,9 @@ | |||||||
| /*
 | /*
 | ||||||
|  * Ceph string constants |  * Ceph fs string constants | ||||||
|  */ |  */ | ||||||
| #include "types.h" | #include <linux/module.h> | ||||||
|  | #include <linux/ceph/types.h> | ||||||
| 
 | 
 | ||||||
| const char *ceph_entity_type_name(int type) |  | ||||||
| { |  | ||||||
| 	switch (type) { |  | ||||||
| 	case CEPH_ENTITY_TYPE_MDS: return "mds"; |  | ||||||
| 	case CEPH_ENTITY_TYPE_OSD: return "osd"; |  | ||||||
| 	case CEPH_ENTITY_TYPE_MON: return "mon"; |  | ||||||
| 	case CEPH_ENTITY_TYPE_CLIENT: return "client"; |  | ||||||
| 	case CEPH_ENTITY_TYPE_AUTH: return "auth"; |  | ||||||
| 	default: return "unknown"; |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| const char *ceph_osd_op_name(int op) |  | ||||||
| { |  | ||||||
| 	switch (op) { |  | ||||||
| 	case CEPH_OSD_OP_READ: return "read"; |  | ||||||
| 	case CEPH_OSD_OP_STAT: return "stat"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_WRITE: return "write"; |  | ||||||
| 	case CEPH_OSD_OP_DELETE: return "delete"; |  | ||||||
| 	case CEPH_OSD_OP_TRUNCATE: return "truncate"; |  | ||||||
| 	case CEPH_OSD_OP_ZERO: return "zero"; |  | ||||||
| 	case CEPH_OSD_OP_WRITEFULL: return "writefull"; |  | ||||||
| 	case CEPH_OSD_OP_ROLLBACK: return "rollback"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_APPEND: return "append"; |  | ||||||
| 	case CEPH_OSD_OP_STARTSYNC: return "startsync"; |  | ||||||
| 	case CEPH_OSD_OP_SETTRUNC: return "settrunc"; |  | ||||||
| 	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_TMAPUP: return "tmapup"; |  | ||||||
| 	case CEPH_OSD_OP_TMAPGET: return "tmapget"; |  | ||||||
| 	case CEPH_OSD_OP_TMAPPUT: return "tmapput"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_GETXATTR: return "getxattr"; |  | ||||||
| 	case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; |  | ||||||
| 	case CEPH_OSD_OP_SETXATTR: return "setxattr"; |  | ||||||
| 	case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; |  | ||||||
| 	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; |  | ||||||
| 	case CEPH_OSD_OP_RMXATTR: return "rmxattr"; |  | ||||||
| 	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_PULL: return "pull"; |  | ||||||
| 	case CEPH_OSD_OP_PUSH: return "push"; |  | ||||||
| 	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; |  | ||||||
| 	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; |  | ||||||
| 	case CEPH_OSD_OP_SCRUB: return "scrub"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_WRLOCK: return "wrlock"; |  | ||||||
| 	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; |  | ||||||
| 	case CEPH_OSD_OP_RDLOCK: return "rdlock"; |  | ||||||
| 	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; |  | ||||||
| 	case CEPH_OSD_OP_UPLOCK: return "uplock"; |  | ||||||
| 	case CEPH_OSD_OP_DNLOCK: return "dnlock"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_CALL: return "call"; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_OSD_OP_PGLS: return "pgls"; |  | ||||||
| 	} |  | ||||||
| 	return "???"; |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| const char *ceph_mds_state_name(int s) | const char *ceph_mds_state_name(int s) | ||||||
| { | { | ||||||
| @ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o) | |||||||
| 	} | 	} | ||||||
| 	return "???"; | 	return "???"; | ||||||
| } | } | ||||||
| 
 |  | ||||||
| const char *ceph_pool_op_name(int op) |  | ||||||
| { |  | ||||||
| 	switch (op) { |  | ||||||
| 	case POOL_OP_CREATE: return "create"; |  | ||||||
| 	case POOL_OP_DELETE: return "delete"; |  | ||||||
| 	case POOL_OP_AUID_CHANGE: return "auid change"; |  | ||||||
| 	case POOL_OP_CREATE_SNAP: return "create snap"; |  | ||||||
| 	case POOL_OP_DELETE_SNAP: return "delete snap"; |  | ||||||
| 	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; |  | ||||||
| 	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; |  | ||||||
| 	} |  | ||||||
| 	return "???"; |  | ||||||
| } |  | ||||||
							
								
								
									
										1180
									
								
								fs/ceph/super.c
									
									
									
									
									
								
							
							
						
						
									
										1180
									
								
								fs/ceph/super.c
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										420
									
								
								fs/ceph/super.h
									
									
									
									
									
								
							
							
						
						
									
										420
									
								
								fs/ceph/super.h
									
									
									
									
									
								
							| @ -1,7 +1,7 @@ | |||||||
| #ifndef _FS_CEPH_SUPER_H | #ifndef _FS_CEPH_SUPER_H | ||||||
| #define _FS_CEPH_SUPER_H | #define _FS_CEPH_SUPER_H | ||||||
| 
 | 
 | ||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/unaligned.h> | #include <asm/unaligned.h> | ||||||
| #include <linux/backing-dev.h> | #include <linux/backing-dev.h> | ||||||
| @ -14,13 +14,7 @@ | |||||||
| #include <linux/writeback.h> | #include <linux/writeback.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | 
 | ||||||
| #include "types.h" | #include <linux/ceph/libceph.h> | ||||||
| #include "messenger.h" |  | ||||||
| #include "msgpool.h" |  | ||||||
| #include "mon_client.h" |  | ||||||
| #include "mds_client.h" |  | ||||||
| #include "osd_client.h" |  | ||||||
| #include "ceph_fs.h" |  | ||||||
| 
 | 
 | ||||||
| /* f_type in struct statfs */ | /* f_type in struct statfs */ | ||||||
| #define CEPH_SUPER_MAGIC 0x00c36400 | #define CEPH_SUPER_MAGIC 0x00c36400 | ||||||
| @ -30,42 +24,25 @@ | |||||||
| #define CEPH_BLOCK_SHIFT   20  /* 1 MB */ | #define CEPH_BLOCK_SHIFT   20  /* 1 MB */ | ||||||
| #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT) | #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT) | ||||||
| 
 | 
 | ||||||
| /*
 | #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */ | ||||||
|  * Supported features | #define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */ | ||||||
|  */ | #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */ | ||||||
| #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK |  | ||||||
| #define CEPH_FEATURE_REQUIRED  CEPH_FEATURE_NOSRCADDR |  | ||||||
| 
 | 
 | ||||||
| /*
 | #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES) | ||||||
|  * mount options |  | ||||||
|  */ |  | ||||||
| #define CEPH_OPT_FSID             (1<<0) |  | ||||||
| #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */ |  | ||||||
| #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */ |  | ||||||
| #define CEPH_OPT_DIRSTAT          (1<<4) /* funky `cat dirname` for stats */ |  | ||||||
| #define CEPH_OPT_RBYTES           (1<<5) /* dir st_bytes = rbytes */ |  | ||||||
| #define CEPH_OPT_NOCRC            (1<<6) /* no data crc on writes */ |  | ||||||
| #define CEPH_OPT_NOASYNCREADDIR   (1<<7) /* no dcache readdir */ |  | ||||||
| 
 | 
 | ||||||
| #define CEPH_OPT_DEFAULT   (CEPH_OPT_RBYTES) | #define ceph_set_mount_opt(fsc, opt) \ | ||||||
|  | 	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | ||||||
|  | #define ceph_test_mount_opt(fsc, opt) \ | ||||||
|  | 	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) | ||||||
| 
 | 
 | ||||||
| #define ceph_set_opt(client, opt) \ | #define CEPH_MAX_READDIR_DEFAULT        1024 | ||||||
| 	(client)->mount_args->flags |= CEPH_OPT_##opt; | #define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024) | ||||||
| #define ceph_test_opt(client, opt) \ | #define CEPH_SNAPDIRNAME_DEFAULT        ".snap" | ||||||
| 	(!!((client)->mount_args->flags & CEPH_OPT_##opt)) |  | ||||||
| 
 | 
 | ||||||
| 
 | struct ceph_mount_options { | ||||||
| struct ceph_mount_args { |  | ||||||
| 	int sb_flags; |  | ||||||
| 	int flags; | 	int flags; | ||||||
| 	struct ceph_fsid fsid; | 	int sb_flags; | ||||||
| 	struct ceph_entity_addr my_addr; | 
 | ||||||
| 	int num_mon; |  | ||||||
| 	struct ceph_entity_addr *mon_addr; |  | ||||||
| 	int mount_timeout; |  | ||||||
| 	int osd_idle_ttl; |  | ||||||
| 	int osd_timeout; |  | ||||||
| 	int osd_keepalive_timeout; |  | ||||||
| 	int wsize; | 	int wsize; | ||||||
| 	int rsize;            /* max readahead */ | 	int rsize;            /* max readahead */ | ||||||
| 	int congestion_kb;    /* max writeback in flight */ | 	int congestion_kb;    /* max writeback in flight */ | ||||||
| @ -73,82 +50,25 @@ struct ceph_mount_args { | |||||||
| 	int cap_release_safety; | 	int cap_release_safety; | ||||||
| 	int max_readdir;       /* max readdir result (entires) */ | 	int max_readdir;       /* max readdir result (entires) */ | ||||||
| 	int max_readdir_bytes; /* max readdir result (bytes) */ | 	int max_readdir_bytes; /* max readdir result (bytes) */ | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * everything above this point can be memcmp'd; everything below | ||||||
|  | 	 * is handled in compare_mount_options() | ||||||
|  | 	 */ | ||||||
|  | 
 | ||||||
| 	char *snapdir_name;   /* default ".snap" */ | 	char *snapdir_name;   /* default ".snap" */ | ||||||
| 	char *name; |  | ||||||
| 	char *secret; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /*
 | struct ceph_fs_client { | ||||||
|  * defaults |  | ||||||
|  */ |  | ||||||
| #define CEPH_MOUNT_TIMEOUT_DEFAULT  60 |  | ||||||
| #define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */ |  | ||||||
| #define CEPH_OSD_KEEPALIVE_DEFAULT  5 |  | ||||||
| #define CEPH_OSD_IDLE_TTL_DEFAULT    60 |  | ||||||
| #define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */ |  | ||||||
| #define CEPH_MAX_READDIR_DEFAULT    1024 |  | ||||||
| #define CEPH_MAX_READDIR_BYTES_DEFAULT    (512*1024) |  | ||||||
| 
 |  | ||||||
| #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024) |  | ||||||
| #define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024) |  | ||||||
| 
 |  | ||||||
| #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |  | ||||||
| #define CEPH_AUTH_NAME_DEFAULT   "guest" |  | ||||||
| /*
 |  | ||||||
|  * Delay telling the MDS we no longer want caps, in case we reopen |  | ||||||
|  * the file.  Delay a minimum amount of time, even if we send a cap |  | ||||||
|  * message for some other reason.  Otherwise, take the oppotunity to |  | ||||||
|  * update the mds to avoid sending another message later. |  | ||||||
|  */ |  | ||||||
| #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */ |  | ||||||
| #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */ |  | ||||||
| 
 |  | ||||||
| #define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4) |  | ||||||
| 
 |  | ||||||
| /* mount state */ |  | ||||||
| enum { |  | ||||||
| 	CEPH_MOUNT_MOUNTING, |  | ||||||
| 	CEPH_MOUNT_MOUNTED, |  | ||||||
| 	CEPH_MOUNT_UNMOUNTING, |  | ||||||
| 	CEPH_MOUNT_UNMOUNTED, |  | ||||||
| 	CEPH_MOUNT_SHUTDOWN, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * subtract jiffies |  | ||||||
|  */ |  | ||||||
| static inline unsigned long time_sub(unsigned long a, unsigned long b) |  | ||||||
| { |  | ||||||
| 	BUG_ON(time_after(b, a)); |  | ||||||
| 	return (long)a - (long)b; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * per-filesystem client state |  | ||||||
|  * |  | ||||||
|  * possibly shared by multiple mount points, if they are |  | ||||||
|  * mounting the same ceph filesystem/cluster. |  | ||||||
|  */ |  | ||||||
| struct ceph_client { |  | ||||||
| 	struct ceph_fsid fsid; |  | ||||||
| 	bool have_fsid; |  | ||||||
| 
 |  | ||||||
| 	struct mutex mount_mutex;       /* serialize mount attempts */ |  | ||||||
| 	struct ceph_mount_args *mount_args; |  | ||||||
| 
 |  | ||||||
| 	struct super_block *sb; | 	struct super_block *sb; | ||||||
| 
 | 
 | ||||||
|  | 	struct ceph_mount_options *mount_options; | ||||||
|  | 	struct ceph_client *client; | ||||||
|  | 
 | ||||||
| 	unsigned long mount_state; | 	unsigned long mount_state; | ||||||
| 	wait_queue_head_t auth_wq; |  | ||||||
| 
 |  | ||||||
| 	int auth_err; |  | ||||||
| 
 |  | ||||||
| 	int min_caps;                  /* min caps i added */ | 	int min_caps;                  /* min caps i added */ | ||||||
| 
 | 
 | ||||||
| 	struct ceph_messenger *msgr;   /* messenger instance */ | 	struct ceph_mds_client *mdsc; | ||||||
| 	struct ceph_mon_client monc; |  | ||||||
| 	struct ceph_mds_client mdsc; |  | ||||||
| 	struct ceph_osd_client osdc; |  | ||||||
| 
 | 
 | ||||||
| 	/* writeback */ | 	/* writeback */ | ||||||
| 	mempool_t *wb_pagevec_pool; | 	mempool_t *wb_pagevec_pool; | ||||||
| @ -160,14 +80,14 @@ struct ceph_client { | |||||||
| 	struct backing_dev_info backing_dev_info; | 	struct backing_dev_info backing_dev_info; | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_DEBUG_FS | #ifdef CONFIG_DEBUG_FS | ||||||
| 	struct dentry *debugfs_monmap; | 	struct dentry *debugfs_dentry_lru, *debugfs_caps; | ||||||
| 	struct dentry *debugfs_mdsmap, *debugfs_osdmap; |  | ||||||
| 	struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; |  | ||||||
| 	struct dentry *debugfs_congestion_kb; | 	struct dentry *debugfs_congestion_kb; | ||||||
| 	struct dentry *debugfs_bdi; | 	struct dentry *debugfs_bdi; | ||||||
|  | 	struct dentry *debugfs_mdsc, *debugfs_mdsmap; | ||||||
| #endif | #endif | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * File i/o capability.  This tracks shared state with the metadata |  * File i/o capability.  This tracks shared state with the metadata | ||||||
|  * server that allows us to cache or writeback attributes or to read |  * server that allows us to cache or writeback attributes or to read | ||||||
| @ -275,6 +195,20 @@ struct ceph_inode_xattr { | |||||||
| 	int should_free_val; | 	int should_free_val; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Ceph dentry state | ||||||
|  |  */ | ||||||
|  | struct ceph_dentry_info { | ||||||
|  | 	struct ceph_mds_session *lease_session; | ||||||
|  | 	u32 lease_gen, lease_shared_gen; | ||||||
|  | 	u32 lease_seq; | ||||||
|  | 	unsigned long lease_renew_after, lease_renew_from; | ||||||
|  | 	struct list_head lru; | ||||||
|  | 	struct dentry *dentry; | ||||||
|  | 	u64 time; | ||||||
|  | 	u64 offset; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct ceph_inode_xattrs_info { | struct ceph_inode_xattrs_info { | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * (still encoded) xattr blob. we avoid the overhead of parsing | 	 * (still encoded) xattr blob. we avoid the overhead of parsing | ||||||
| @ -296,11 +230,6 @@ struct ceph_inode_xattrs_info { | |||||||
| /*
 | /*
 | ||||||
|  * Ceph inode. |  * Ceph inode. | ||||||
|  */ |  */ | ||||||
| #define CEPH_I_COMPLETE  1  /* we have complete directory cached */ |  | ||||||
| #define CEPH_I_NODELAY   4  /* do not delay cap release */ |  | ||||||
| #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */ |  | ||||||
| #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */ |  | ||||||
| 
 |  | ||||||
| struct ceph_inode_info { | struct ceph_inode_info { | ||||||
| 	struct ceph_vino i_vino;   /* ceph ino + snap */ | 	struct ceph_vino i_vino;   /* ceph ino + snap */ | ||||||
| 
 | 
 | ||||||
| @ -391,69 +320,9 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) | |||||||
| 	return container_of(inode, struct ceph_inode_info, vfs_inode); | 	return container_of(inode, struct ceph_inode_info, vfs_inode); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void ceph_i_clear(struct inode *inode, unsigned mask) | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||||||
| { | { | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	return ceph_inode(inode)->i_vino; | ||||||
| 
 |  | ||||||
| 	spin_lock(&inode->i_lock); |  | ||||||
| 	ci->i_ceph_flags &= ~mask; |  | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void ceph_i_set(struct inode *inode, unsigned mask) |  | ||||||
| { |  | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); |  | ||||||
| 
 |  | ||||||
| 	spin_lock(&inode->i_lock); |  | ||||||
| 	ci->i_ceph_flags |= mask; |  | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline bool ceph_i_test(struct inode *inode, unsigned mask) |  | ||||||
| { |  | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); |  | ||||||
| 	bool r; |  | ||||||
| 
 |  | ||||||
| 	smp_mb(); |  | ||||||
| 	r = (ci->i_ceph_flags & mask) == mask; |  | ||||||
| 	return r; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| /* find a specific frag @f */ |  | ||||||
| extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, |  | ||||||
| 						u32 f); |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * choose fragment for value @v.  copy frag content to pfrag, if leaf |  | ||||||
|  * exists |  | ||||||
|  */ |  | ||||||
| extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, |  | ||||||
| 			    struct ceph_inode_frag *pfrag, |  | ||||||
| 			    int *found); |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Ceph dentry state |  | ||||||
|  */ |  | ||||||
| struct ceph_dentry_info { |  | ||||||
| 	struct ceph_mds_session *lease_session; |  | ||||||
| 	u32 lease_gen, lease_shared_gen; |  | ||||||
| 	u32 lease_seq; |  | ||||||
| 	unsigned long lease_renew_after, lease_renew_from; |  | ||||||
| 	struct list_head lru; |  | ||||||
| 	struct dentry *dentry; |  | ||||||
| 	u64 time; |  | ||||||
| 	u64 offset; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) |  | ||||||
| { |  | ||||||
| 	return (struct ceph_dentry_info *)dentry->d_fsdata; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) |  | ||||||
| { |  | ||||||
| 	return ((loff_t)frag << 32) | (loff_t)off; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -472,18 +341,6 @@ static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | |||||||
| 	return ino; | 	return ino; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline int ceph_set_ino_cb(struct inode *inode, void *data) |  | ||||||
| { |  | ||||||
| 	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; |  | ||||||
| 	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline struct ceph_vino ceph_vino(struct inode *inode) |  | ||||||
| { |  | ||||||
| 	return ceph_inode(inode)->i_vino; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /* for printf-style formatting */ | /* for printf-style formatting */ | ||||||
| #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||||||
| 
 | 
 | ||||||
| @ -512,6 +369,73 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Ceph inode. | ||||||
|  |  */ | ||||||
|  | #define CEPH_I_COMPLETE  1  /* we have complete directory cached */ | ||||||
|  | #define CEPH_I_NODELAY   4  /* do not delay cap release */ | ||||||
|  | #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */ | ||||||
|  | #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */ | ||||||
|  | 
 | ||||||
|  | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | ||||||
|  | { | ||||||
|  | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&inode->i_lock); | ||||||
|  | 	ci->i_ceph_flags &= ~mask; | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void ceph_i_set(struct inode *inode, unsigned mask) | ||||||
|  | { | ||||||
|  | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&inode->i_lock); | ||||||
|  | 	ci->i_ceph_flags |= mask; | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool ceph_i_test(struct inode *inode, unsigned mask) | ||||||
|  | { | ||||||
|  | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
|  | 	bool r; | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&inode->i_lock); | ||||||
|  | 	r = (ci->i_ceph_flags & mask) == mask; | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
|  | 	return r; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* find a specific frag @f */ | ||||||
|  | extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, | ||||||
|  | 						u32 f); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * choose fragment for value @v.  copy frag content to pfrag, if leaf | ||||||
|  |  * exists | ||||||
|  |  */ | ||||||
|  | extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | ||||||
|  | 			    struct ceph_inode_frag *pfrag, | ||||||
|  | 			    int *found); | ||||||
|  | 
 | ||||||
|  | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | ||||||
|  | { | ||||||
|  | 	return (struct ceph_dentry_info *)dentry->d_fsdata; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | ||||||
|  | { | ||||||
|  | 	return ((loff_t)frag << 32) | (loff_t)off; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline int ceph_set_ino_cb(struct inode *inode, void *data) | ||||||
|  | { | ||||||
|  | 	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | ||||||
|  | 	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * caps helpers |  * caps helpers | ||||||
|  */ |  */ | ||||||
| @ -576,18 +500,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||||||
| 			     struct ceph_cap_reservation *ctx, int need); | 			     struct ceph_cap_reservation *ctx, int need); | ||||||
| extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | ||||||
| 			       struct ceph_cap_reservation *ctx); | 			       struct ceph_cap_reservation *ctx); | ||||||
| extern void ceph_reservation_status(struct ceph_client *client, | extern void ceph_reservation_status(struct ceph_fs_client *client, | ||||||
| 				    int *total, int *avail, int *used, | 				    int *total, int *avail, int *used, | ||||||
| 				    int *reserved, int *min); | 				    int *reserved, int *min); | ||||||
| 
 | 
 | ||||||
| static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) | static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) | ||||||
| { | { | ||||||
| 	return (struct ceph_client *)inode->i_sb->s_fs_info; | 	return (struct ceph_fs_client *)inode->i_sb->s_fs_info; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) | static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) | ||||||
| { | { | ||||||
| 	return (struct ceph_client *)sb->s_fs_info; | 	return (struct ceph_fs_client *)sb->s_fs_info; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -616,51 +540,6 @@ struct ceph_file_info { | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * snapshots |  | ||||||
|  */ |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * A "snap context" is the set of existing snapshots when we |  | ||||||
|  * write data.  It is used by the OSD to guide its COW behavior. |  | ||||||
|  * |  | ||||||
|  * The ceph_snap_context is refcounted, and attached to each dirty |  | ||||||
|  * page, indicating which context the dirty data belonged when it was |  | ||||||
|  * dirtied. |  | ||||||
|  */ |  | ||||||
| struct ceph_snap_context { |  | ||||||
| 	atomic_t nref; |  | ||||||
| 	u64 seq; |  | ||||||
| 	int num_snaps; |  | ||||||
| 	u64 snaps[]; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static inline struct ceph_snap_context * |  | ||||||
| ceph_get_snap_context(struct ceph_snap_context *sc) |  | ||||||
| { |  | ||||||
| 	/*
 |  | ||||||
| 	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), |  | ||||||
| 	       atomic_read(&sc->nref)+1); |  | ||||||
| 	*/ |  | ||||||
| 	if (sc) |  | ||||||
| 		atomic_inc(&sc->nref); |  | ||||||
| 	return sc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void ceph_put_snap_context(struct ceph_snap_context *sc) |  | ||||||
| { |  | ||||||
| 	if (!sc) |  | ||||||
| 		return; |  | ||||||
| 	/*
 |  | ||||||
| 	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), |  | ||||||
| 	       atomic_read(&sc->nref)-1); |  | ||||||
| 	*/ |  | ||||||
| 	if (atomic_dec_and_test(&sc->nref)) { |  | ||||||
| 		/*printk(" deleting snap_context %p\n", sc);*/ |  | ||||||
| 		kfree(sc); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * A "snap realm" describes a subset of the file hierarchy sharing |  * A "snap realm" describes a subset of the file hierarchy sharing | ||||||
|  * the same set of snapshots that apply to it.  The realms themselves |  * the same set of snapshots that apply to it.  The realms themselves | ||||||
| @ -699,16 +578,33 @@ struct ceph_snap_realm { | |||||||
| 	spinlock_t inodes_with_caps_lock; | 	spinlock_t inodes_with_caps_lock; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 | static inline int default_congestion_kb(void) | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * calculate the number of pages a given length and offset map onto, |  | ||||||
|  * if we align the data. |  | ||||||
|  */ |  | ||||||
| static inline int calc_pages_for(u64 off, u64 len) |  | ||||||
| { | { | ||||||
| 	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | 	int congestion_kb; | ||||||
| 		(off >> PAGE_CACHE_SHIFT); | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Copied from NFS | ||||||
|  | 	 * | ||||||
|  | 	 * congestion size, scale with available memory. | ||||||
|  | 	 * | ||||||
|  | 	 *  64MB:    8192k | ||||||
|  | 	 * 128MB:   11585k | ||||||
|  | 	 * 256MB:   16384k | ||||||
|  | 	 * 512MB:   23170k | ||||||
|  | 	 *   1GB:   32768k | ||||||
|  | 	 *   2GB:   46340k | ||||||
|  | 	 *   4GB:   65536k | ||||||
|  | 	 *   8GB:   92681k | ||||||
|  | 	 *  16GB:  131072k | ||||||
|  | 	 * | ||||||
|  | 	 * This allows larger machines to have larger/more transfers. | ||||||
|  | 	 * Limit the default to 256M | ||||||
|  | 	 */ | ||||||
|  | 	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||||||
|  | 	if (congestion_kb > 256*1024) | ||||||
|  | 		congestion_kb = 256*1024; | ||||||
|  | 
 | ||||||
|  | 	return congestion_kb; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -741,16 +637,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) | |||||||
| 			   ci_item)->writing; | 			   ci_item)->writing; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| /* super.c */ |  | ||||||
| extern struct kmem_cache *ceph_inode_cachep; |  | ||||||
| extern struct kmem_cache *ceph_cap_cachep; |  | ||||||
| extern struct kmem_cache *ceph_dentry_cachep; |  | ||||||
| extern struct kmem_cache *ceph_file_cachep; |  | ||||||
| 
 |  | ||||||
| extern const char *ceph_msg_type_name(int type); |  | ||||||
| extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); |  | ||||||
| 
 |  | ||||||
| /* inode.c */ | /* inode.c */ | ||||||
| extern const struct inode_operations ceph_file_iops; | extern const struct inode_operations ceph_file_iops; | ||||||
| 
 | 
 | ||||||
| @ -857,12 +743,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||||||
| /* file.c */ | /* file.c */ | ||||||
| extern const struct file_operations ceph_file_fops; | extern const struct file_operations ceph_file_fops; | ||||||
| extern const struct address_space_operations ceph_aops; | extern const struct address_space_operations ceph_aops; | ||||||
|  | extern int ceph_copy_to_page_vector(struct page **pages, | ||||||
|  | 				    const char *data, | ||||||
|  | 				    loff_t off, size_t len); | ||||||
|  | extern int ceph_copy_from_page_vector(struct page **pages, | ||||||
|  | 				    char *data, | ||||||
|  | 				    loff_t off, size_t len); | ||||||
|  | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||||||
| extern int ceph_open(struct inode *inode, struct file *file); | extern int ceph_open(struct inode *inode, struct file *file); | ||||||
| extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | ||||||
| 				       struct nameidata *nd, int mode, | 				       struct nameidata *nd, int mode, | ||||||
| 				       int locked_dir); | 				       int locked_dir); | ||||||
| extern int ceph_release(struct inode *inode, struct file *filp); | extern int ceph_release(struct inode *inode, struct file *filp); | ||||||
| extern void ceph_release_page_vector(struct page **pages, int num_pages); |  | ||||||
| 
 | 
 | ||||||
| /* dir.c */ | /* dir.c */ | ||||||
| extern const struct file_operations ceph_dir_fops; | extern const struct file_operations ceph_dir_fops; | ||||||
| @ -892,12 +784,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||||||
| /* export.c */ | /* export.c */ | ||||||
| extern const struct export_operations ceph_export_ops; | extern const struct export_operations ceph_export_ops; | ||||||
| 
 | 
 | ||||||
| /* debugfs.c */ |  | ||||||
| extern int ceph_debugfs_init(void); |  | ||||||
| extern void ceph_debugfs_cleanup(void); |  | ||||||
| extern int ceph_debugfs_client_init(struct ceph_client *client); |  | ||||||
| extern void ceph_debugfs_client_cleanup(struct ceph_client *client); |  | ||||||
| 
 |  | ||||||
| /* locks.c */ | /* locks.c */ | ||||||
| extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | ||||||
| extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | ||||||
| @ -914,4 +800,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | |||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* debugfs.c */ | ||||||
|  | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | ||||||
|  | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | ||||||
|  | 
 | ||||||
| #endif /* _FS_CEPH_SUPER_H */ | #endif /* _FS_CEPH_SUPER_H */ | ||||||
|  | |||||||
| @ -1,6 +1,9 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
|  | 
 | ||||||
| #include "super.h" | #include "super.h" | ||||||
| #include "decode.h" | #include "mds_client.h" | ||||||
|  | 
 | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/xattr.h> | #include <linux/xattr.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| @ -620,12 +623,12 @@ out: | |||||||
| static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | ||||||
| 			      const char *value, size_t size, int flags) | 			      const char *value, size_t size, int flags) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | ||||||
| 	struct inode *inode = dentry->d_inode; | 	struct inode *inode = dentry->d_inode; | ||||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||||
| 	struct inode *parent_inode = dentry->d_parent->d_inode; | 	struct inode *parent_inode = dentry->d_parent->d_inode; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	int err; | 	int err; | ||||||
| 	int i, nr_pages; | 	int i, nr_pages; | ||||||
| 	struct page **pages = NULL; | 	struct page **pages = NULL; | ||||||
| @ -713,10 +716,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||||||
| 
 | 
 | ||||||
| 	/* preallocate memory for xattr name, value, index node */ | 	/* preallocate memory for xattr name, value, index node */ | ||||||
| 	err = -ENOMEM; | 	err = -ENOMEM; | ||||||
| 	newname = kmalloc(name_len + 1, GFP_NOFS); | 	newname = kmemdup(name, name_len + 1, GFP_NOFS); | ||||||
| 	if (!newname) | 	if (!newname) | ||||||
| 		goto out; | 		goto out; | ||||||
| 	memcpy(newname, name, name_len + 1); |  | ||||||
| 
 | 
 | ||||||
| 	if (val_len) { | 	if (val_len) { | ||||||
| 		newval = kmalloc(val_len + 1, GFP_NOFS); | 		newval = kmalloc(val_len + 1, GFP_NOFS); | ||||||
| @ -777,8 +779,8 @@ out: | |||||||
| 
 | 
 | ||||||
| static int ceph_send_removexattr(struct dentry *dentry, const char *name) | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | ||||||
| { | { | ||||||
| 	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | ||||||
| 	struct ceph_mds_client *mdsc = &client->mdsc; | 	struct ceph_mds_client *mdsc = fsc->mdsc; | ||||||
| 	struct inode *inode = dentry->d_inode; | 	struct inode *inode = dentry->d_inode; | ||||||
| 	struct inode *parent_inode = dentry->d_parent->d_inode; | 	struct inode *parent_inode = dentry->d_parent->d_inode; | ||||||
| 	struct ceph_mds_request *req; | 	struct ceph_mds_request *req; | ||||||
|  | |||||||
| @ -1,8 +1,8 @@ | |||||||
| #ifndef _FS_CEPH_AUTH_H | #ifndef _FS_CEPH_AUTH_H | ||||||
| #define _FS_CEPH_AUTH_H | #define _FS_CEPH_AUTH_H | ||||||
| 
 | 
 | ||||||
| #include "types.h" | #include <linux/ceph/types.h> | ||||||
| #include "buffer.h" | #include <linux/ceph/buffer.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Abstract interface for communicating with the authenticate module. |  * Abstract interface for communicating with the authenticate module. | ||||||
| @ -3,7 +3,7 @@ | |||||||
| 
 | 
 | ||||||
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_CEPH_FS_PRETTYDEBUG | #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * wrap pr_debug to include a filename:lineno prefix on each line. |  * wrap pr_debug to include a filename:lineno prefix on each line. | ||||||
| @ -14,7 +14,8 @@ | |||||||
| # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) | ||||||
| extern const char *ceph_file_part(const char *s, int len); | extern const char *ceph_file_part(const char *s, int len); | ||||||
| #  define dout(fmt, ...)						\ | #  define dout(fmt, ...)						\ | ||||||
| 	pr_debug(" %12.12s:%-4d : " fmt,				\ | 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\ | ||||||
|  | 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\ | ||||||
| 		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\ | 		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\ | ||||||
| 		 __LINE__, ##__VA_ARGS__) | 		 __LINE__, ##__VA_ARGS__) | ||||||
| # else | # else | ||||||
| @ -299,6 +299,7 @@ enum { | |||||||
| 	CEPH_MDS_OP_SETATTR    = 0x01108, | 	CEPH_MDS_OP_SETATTR    = 0x01108, | ||||||
| 	CEPH_MDS_OP_SETFILELOCK= 0x01109, | 	CEPH_MDS_OP_SETFILELOCK= 0x01109, | ||||||
| 	CEPH_MDS_OP_GETFILELOCK= 0x00110, | 	CEPH_MDS_OP_GETFILELOCK= 0x00110, | ||||||
|  | 	CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, | ||||||
| 
 | 
 | ||||||
| 	CEPH_MDS_OP_MKNOD      = 0x01201, | 	CEPH_MDS_OP_MKNOD      = 0x01201, | ||||||
| 	CEPH_MDS_OP_LINK       = 0x01202, | 	CEPH_MDS_OP_LINK       = 0x01202, | ||||||
							
								
								
									
										33
									
								
								include/linux/ceph/debugfs.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								include/linux/ceph/debugfs.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,33 @@ | |||||||
|  | #ifndef _FS_CEPH_DEBUGFS_H | ||||||
|  | #define _FS_CEPH_DEBUGFS_H | ||||||
|  | 
 | ||||||
|  | #include "ceph_debug.h" | ||||||
|  | #include "types.h" | ||||||
|  | 
 | ||||||
|  | #define CEPH_DEFINE_SHOW_FUNC(name)					\ | ||||||
|  | static int name##_open(struct inode *inode, struct file *file)		\ | ||||||
|  | {									\ | ||||||
|  | 	struct seq_file *sf;						\ | ||||||
|  | 	int ret;							\ | ||||||
|  | 									\ | ||||||
|  | 	ret = single_open(file, name, NULL);				\ | ||||||
|  | 	sf = file->private_data;					\ | ||||||
|  | 	sf->private = inode->i_private;					\ | ||||||
|  | 	return ret;							\ | ||||||
|  | }									\ | ||||||
|  | 									\ | ||||||
|  | static const struct file_operations name##_fops = {			\ | ||||||
|  | 	.open		= name##_open,					\ | ||||||
|  | 	.read		= seq_read,					\ | ||||||
|  | 	.llseek		= seq_lseek,					\ | ||||||
|  | 	.release	= single_release,				\ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /* debugfs.c */ | ||||||
|  | extern int ceph_debugfs_init(void); | ||||||
|  | extern void ceph_debugfs_cleanup(void); | ||||||
|  | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||||||
|  | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| @ -191,6 +191,11 @@ static inline void ceph_encode_string(void **p, void *end, | |||||||
| 		ceph_encode_need(p, end, n, bad);		\ | 		ceph_encode_need(p, end, n, bad);		\ | ||||||
| 		ceph_encode_copy(p, pv, n);			\ | 		ceph_encode_copy(p, pv, n);			\ | ||||||
| 	} while (0) | 	} while (0) | ||||||
|  | #define ceph_encode_string_safe(p, end, s, n, bad)		\ | ||||||
|  | 	do {							\ | ||||||
|  | 		ceph_encode_need(p, end, n, bad);		\ | ||||||
|  | 		ceph_encode_string(p, end, s, n);		\ | ||||||
|  | 	} while (0) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #endif | #endif | ||||||
							
								
								
									
										249
									
								
								include/linux/ceph/libceph.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										249
									
								
								include/linux/ceph/libceph.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,249 @@ | |||||||
|  | #ifndef _FS_CEPH_LIBCEPH_H | ||||||
|  | #define _FS_CEPH_LIBCEPH_H | ||||||
|  | 
 | ||||||
|  | #include "ceph_debug.h" | ||||||
|  | 
 | ||||||
|  | #include <asm/unaligned.h> | ||||||
|  | #include <linux/backing-dev.h> | ||||||
|  | #include <linux/completion.h> | ||||||
|  | #include <linux/exportfs.h> | ||||||
|  | #include <linux/fs.h> | ||||||
|  | #include <linux/mempool.h> | ||||||
|  | #include <linux/pagemap.h> | ||||||
|  | #include <linux/wait.h> | ||||||
|  | #include <linux/writeback.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | 
 | ||||||
|  | #include "types.h" | ||||||
|  | #include "messenger.h" | ||||||
|  | #include "msgpool.h" | ||||||
|  | #include "mon_client.h" | ||||||
|  | #include "osd_client.h" | ||||||
|  | #include "ceph_fs.h" | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Supported features | ||||||
|  |  */ | ||||||
|  | #define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||||||
|  | #define CEPH_FEATURE_REQUIRED_DEFAULT  CEPH_FEATURE_NOSRCADDR | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * mount options | ||||||
|  |  */ | ||||||
|  | #define CEPH_OPT_FSID             (1<<0) | ||||||
|  | #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */ | ||||||
|  | #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */ | ||||||
|  | #define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */ | ||||||
|  | 
 | ||||||
|  | #define CEPH_OPT_DEFAULT   (0); | ||||||
|  | 
 | ||||||
|  | #define ceph_set_opt(client, opt) \ | ||||||
|  | 	(client)->options->flags |= CEPH_OPT_##opt; | ||||||
|  | #define ceph_test_opt(client, opt) \ | ||||||
|  | 	(!!((client)->options->flags & CEPH_OPT_##opt)) | ||||||
|  | 
 | ||||||
|  | struct ceph_options { | ||||||
|  | 	int flags; | ||||||
|  | 	struct ceph_fsid fsid; | ||||||
|  | 	struct ceph_entity_addr my_addr; | ||||||
|  | 	int mount_timeout; | ||||||
|  | 	int osd_idle_ttl; | ||||||
|  | 	int osd_timeout; | ||||||
|  | 	int osd_keepalive_timeout; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * any type that can't be simply compared or doesn't need need | ||||||
|  | 	 * to be compared should go beyond this point, | ||||||
|  | 	 * ceph_compare_options() should be updated accordingly | ||||||
|  | 	 */ | ||||||
|  | 
 | ||||||
|  | 	struct ceph_entity_addr *mon_addr; /* should be the first
 | ||||||
|  | 					      pointer type of args */ | ||||||
|  | 	int num_mon; | ||||||
|  | 	char *name; | ||||||
|  | 	char *secret; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * defaults | ||||||
|  |  */ | ||||||
|  | #define CEPH_MOUNT_TIMEOUT_DEFAULT  60 | ||||||
|  | #define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */ | ||||||
|  | #define CEPH_OSD_KEEPALIVE_DEFAULT  5 | ||||||
|  | #define CEPH_OSD_IDLE_TTL_DEFAULT    60 | ||||||
|  | #define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */ | ||||||
|  | 
 | ||||||
|  | #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024) | ||||||
|  | #define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024) | ||||||
|  | 
 | ||||||
|  | #define CEPH_AUTH_NAME_DEFAULT   "guest" | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Delay telling the MDS we no longer want caps, in case we reopen | ||||||
|  |  * the file.  Delay a minimum amount of time, even if we send a cap | ||||||
|  |  * message for some other reason.  Otherwise, take the oppotunity to | ||||||
|  |  * update the mds to avoid sending another message later. | ||||||
|  |  */ | ||||||
|  | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */ | ||||||
|  | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */ | ||||||
|  | 
 | ||||||
|  | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4) | ||||||
|  | 
 | ||||||
|  | /* mount state */ | ||||||
|  | enum { | ||||||
|  | 	CEPH_MOUNT_MOUNTING, | ||||||
|  | 	CEPH_MOUNT_MOUNTED, | ||||||
|  | 	CEPH_MOUNT_UNMOUNTING, | ||||||
|  | 	CEPH_MOUNT_UNMOUNTED, | ||||||
|  | 	CEPH_MOUNT_SHUTDOWN, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * subtract jiffies | ||||||
|  |  */ | ||||||
|  | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||||||
|  | { | ||||||
|  | 	BUG_ON(time_after(b, a)); | ||||||
|  | 	return (long)a - (long)b; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct ceph_mds_client; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * per client state | ||||||
|  |  * | ||||||
|  |  * possibly shared by multiple mount points, if they are | ||||||
|  |  * mounting the same ceph filesystem/cluster. | ||||||
|  |  */ | ||||||
|  | struct ceph_client { | ||||||
|  | 	struct ceph_fsid fsid; | ||||||
|  | 	bool have_fsid; | ||||||
|  | 
 | ||||||
|  | 	void *private; | ||||||
|  | 
 | ||||||
|  | 	struct ceph_options *options; | ||||||
|  | 
 | ||||||
|  | 	struct mutex mount_mutex;      /* serialize mount attempts */ | ||||||
|  | 	wait_queue_head_t auth_wq; | ||||||
|  | 	int auth_err; | ||||||
|  | 
 | ||||||
|  | 	int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); | ||||||
|  | 
 | ||||||
|  | 	u32 supported_features; | ||||||
|  | 	u32 required_features; | ||||||
|  | 
 | ||||||
|  | 	struct ceph_messenger *msgr;   /* messenger instance */ | ||||||
|  | 	struct ceph_mon_client monc; | ||||||
|  | 	struct ceph_osd_client osdc; | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_DEBUG_FS | ||||||
|  | 	struct dentry *debugfs_dir; | ||||||
|  | 	struct dentry *debugfs_monmap; | ||||||
|  | 	struct dentry *debugfs_osdmap; | ||||||
|  | #endif | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * snapshots | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * A "snap context" is the set of existing snapshots when we | ||||||
|  |  * write data.  It is used by the OSD to guide its COW behavior. | ||||||
|  |  * | ||||||
|  |  * The ceph_snap_context is refcounted, and attached to each dirty | ||||||
|  |  * page, indicating which context the dirty data belonged when it was | ||||||
|  |  * dirtied. | ||||||
|  |  */ | ||||||
|  | struct ceph_snap_context { | ||||||
|  | 	atomic_t nref; | ||||||
|  | 	u64 seq; | ||||||
|  | 	int num_snaps; | ||||||
|  | 	u64 snaps[]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static inline struct ceph_snap_context * | ||||||
|  | ceph_get_snap_context(struct ceph_snap_context *sc) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||||||
|  | 	       atomic_read(&sc->nref)+1); | ||||||
|  | 	*/ | ||||||
|  | 	if (sc) | ||||||
|  | 		atomic_inc(&sc->nref); | ||||||
|  | 	return sc; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||||||
|  | { | ||||||
|  | 	if (!sc) | ||||||
|  | 		return; | ||||||
|  | 	/*
 | ||||||
|  | 	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||||||
|  | 	       atomic_read(&sc->nref)-1); | ||||||
|  | 	*/ | ||||||
|  | 	if (atomic_dec_and_test(&sc->nref)) { | ||||||
|  | 		/*printk(" deleting snap_context %p\n", sc);*/ | ||||||
|  | 		kfree(sc); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * calculate the number of pages a given length and offset map onto, | ||||||
|  |  * if we align the data. | ||||||
|  |  */ | ||||||
|  | static inline int calc_pages_for(u64 off, u64 len) | ||||||
|  | { | ||||||
|  | 	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | ||||||
|  | 		(off >> PAGE_CACHE_SHIFT); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* ceph_common.c */ | ||||||
|  | extern const char *ceph_msg_type_name(int type); | ||||||
|  | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||||||
|  | extern struct kmem_cache *ceph_inode_cachep; | ||||||
|  | extern struct kmem_cache *ceph_cap_cachep; | ||||||
|  | extern struct kmem_cache *ceph_dentry_cachep; | ||||||
|  | extern struct kmem_cache *ceph_file_cachep; | ||||||
|  | 
 | ||||||
|  | extern int ceph_parse_options(struct ceph_options **popt, char *options, | ||||||
|  | 			      const char *dev_name, const char *dev_name_end, | ||||||
|  | 			      int (*parse_extra_token)(char *c, void *private), | ||||||
|  | 			      void *private); | ||||||
|  | extern void ceph_destroy_options(struct ceph_options *opt); | ||||||
|  | extern int ceph_compare_options(struct ceph_options *new_opt, | ||||||
|  | 				struct ceph_client *client); | ||||||
|  | extern struct ceph_client *ceph_create_client(struct ceph_options *opt, | ||||||
|  | 					      void *private); | ||||||
|  | extern u64 ceph_client_id(struct ceph_client *client); | ||||||
|  | extern void ceph_destroy_client(struct ceph_client *client); | ||||||
|  | extern int __ceph_open_session(struct ceph_client *client, | ||||||
|  | 			       unsigned long started); | ||||||
|  | extern int ceph_open_session(struct ceph_client *client); | ||||||
|  | 
 | ||||||
|  | /* pagevec.c */ | ||||||
|  | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||||||
|  | 
 | ||||||
|  | extern struct page **ceph_get_direct_page_vector(const char __user *data, | ||||||
|  | 					    int num_pages, | ||||||
|  | 					    loff_t off, size_t len); | ||||||
|  | extern void ceph_put_page_vector(struct page **pages, int num_pages); | ||||||
|  | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||||||
|  | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||||||
|  | extern int ceph_copy_user_to_page_vector(struct page **pages, | ||||||
|  | 					 const char __user *data, | ||||||
|  | 					 loff_t off, size_t len); | ||||||
|  | extern int ceph_copy_to_page_vector(struct page **pages, | ||||||
|  | 				    const char *data, | ||||||
|  | 				    loff_t off, size_t len); | ||||||
|  | extern int ceph_copy_from_page_vector(struct page **pages, | ||||||
|  | 				    char *data, | ||||||
|  | 				    loff_t off, size_t len); | ||||||
|  | extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, | ||||||
|  | 				    loff_t off, size_t len); | ||||||
|  | extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif /* _FS_CEPH_SUPER_H */ | ||||||
| @ -65,6 +65,9 @@ struct ceph_messenger { | |||||||
| 	 */ | 	 */ | ||||||
| 	u32 global_seq; | 	u32 global_seq; | ||||||
| 	spinlock_t global_seq_lock; | 	spinlock_t global_seq_lock; | ||||||
|  | 
 | ||||||
|  | 	u32 supported_features; | ||||||
|  | 	u32 required_features; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -82,6 +85,10 @@ struct ceph_msg { | |||||||
| 	struct ceph_pagelist *pagelist; /* instead of pages */ | 	struct ceph_pagelist *pagelist; /* instead of pages */ | ||||||
| 	struct list_head list_head; | 	struct list_head list_head; | ||||||
| 	struct kref kref; | 	struct kref kref; | ||||||
|  | 	struct bio  *bio;		/* instead of pages/pagelist */ | ||||||
|  | 	struct bio  *bio_iter;		/* bio iterator */ | ||||||
|  | 	int bio_seg;			/* current bio segment */ | ||||||
|  | 	struct ceph_pagelist *trail;	/* the trailing part of the data */ | ||||||
| 	bool front_is_vmalloc; | 	bool front_is_vmalloc; | ||||||
| 	bool more_to_follow; | 	bool more_to_follow; | ||||||
| 	bool needs_out_seq; | 	bool needs_out_seq; | ||||||
| @ -205,7 +212,7 @@ struct ceph_connection { | |||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| extern const char *pr_addr(const struct sockaddr_storage *ss); | extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); | ||||||
| extern int ceph_parse_ips(const char *c, const char *end, | extern int ceph_parse_ips(const char *c, const char *end, | ||||||
| 			  struct ceph_entity_addr *addr, | 			  struct ceph_entity_addr *addr, | ||||||
| 			  int max_count, int *count); | 			  int max_count, int *count); | ||||||
| @ -216,7 +223,8 @@ extern void ceph_msgr_exit(void); | |||||||
| extern void ceph_msgr_flush(void); | extern void ceph_msgr_flush(void); | ||||||
| 
 | 
 | ||||||
| extern struct ceph_messenger *ceph_messenger_create( | extern struct ceph_messenger *ceph_messenger_create( | ||||||
| 	struct ceph_entity_addr *myaddr); | 	struct ceph_entity_addr *myaddr, | ||||||
|  | 	u32 features, u32 required); | ||||||
| extern void ceph_messenger_destroy(struct ceph_messenger *); | extern void ceph_messenger_destroy(struct ceph_messenger *); | ||||||
| 
 | 
 | ||||||
| extern void ceph_con_init(struct ceph_messenger *msgr, | extern void ceph_con_init(struct ceph_messenger *msgr, | ||||||
| @ -79,6 +79,7 @@ struct ceph_mon_client { | |||||||
| 	u64 last_tid; | 	u64 last_tid; | ||||||
| 
 | 
 | ||||||
| 	/* mds/osd map */ | 	/* mds/osd map */ | ||||||
|  | 	int want_mdsmap; | ||||||
| 	int want_next_osdmap; /* 1 = want, 2 = want+asked */ | 	int want_next_osdmap; /* 1 = want, 2 = want+asked */ | ||||||
| 	u32 have_osdmap, have_mdsmap; | 	u32 have_osdmap, have_mdsmap; | ||||||
| 
 | 
 | ||||||
| @ -15,6 +15,7 @@ struct ceph_snap_context; | |||||||
| struct ceph_osd_request; | struct ceph_osd_request; | ||||||
| struct ceph_osd_client; | struct ceph_osd_client; | ||||||
| struct ceph_authorizer; | struct ceph_authorizer; | ||||||
|  | struct ceph_pagelist; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * completion callback for async writepages |  * completion callback for async writepages | ||||||
| @ -68,6 +69,7 @@ struct ceph_osd_request { | |||||||
| 	struct list_head  r_unsafe_item; | 	struct list_head  r_unsafe_item; | ||||||
| 
 | 
 | ||||||
| 	struct inode *r_inode;         	      /* for use by callbacks */ | 	struct inode *r_inode;         	      /* for use by callbacks */ | ||||||
|  | 	void *r_priv;			      /* ditto */ | ||||||
| 
 | 
 | ||||||
| 	char              r_oid[40];          /* object name */ | 	char              r_oid[40];          /* object name */ | ||||||
| 	int               r_oid_len; | 	int               r_oid_len; | ||||||
| @ -80,6 +82,11 @@ struct ceph_osd_request { | |||||||
| 	struct page     **r_pages;            /* pages for data payload */ | 	struct page     **r_pages;            /* pages for data payload */ | ||||||
| 	int               r_pages_from_pool; | 	int               r_pages_from_pool; | ||||||
| 	int               r_own_pages;        /* if true, i own page list */ | 	int               r_own_pages;        /* if true, i own page list */ | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 	struct bio       *r_bio;	      /* instead of pages */ | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 	struct ceph_pagelist *r_trail;	      /* trailing part of the data */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct ceph_osd_client { | struct ceph_osd_client { | ||||||
| @ -110,6 +117,42 @@ struct ceph_osd_client { | |||||||
| 	struct ceph_msgpool	msgpool_op_reply; | 	struct ceph_msgpool	msgpool_op_reply; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct ceph_osd_req_op { | ||||||
|  | 	u16 op;           /* CEPH_OSD_OP_* */ | ||||||
|  | 	u32 flags;        /* CEPH_OSD_FLAG_* */ | ||||||
|  | 	union { | ||||||
|  | 		struct { | ||||||
|  | 			u64 offset, length; | ||||||
|  | 			u64 truncate_size; | ||||||
|  | 			u32 truncate_seq; | ||||||
|  | 		} extent; | ||||||
|  | 		struct { | ||||||
|  | 			const char *name; | ||||||
|  | 			u32 name_len; | ||||||
|  | 			const char  *val; | ||||||
|  | 			u32 value_len; | ||||||
|  | 			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */ | ||||||
|  | 			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */ | ||||||
|  | 		} xattr; | ||||||
|  | 		struct { | ||||||
|  | 			const char *class_name; | ||||||
|  | 			__u8 class_len; | ||||||
|  | 			const char *method_name; | ||||||
|  | 			__u8 method_len; | ||||||
|  | 			__u8 argc; | ||||||
|  | 			const char *indata; | ||||||
|  | 			u32 indata_len; | ||||||
|  | 		} cls; | ||||||
|  | 		struct { | ||||||
|  | 			u64 cookie, count; | ||||||
|  | 		} pgls; | ||||||
|  | 	        struct { | ||||||
|  | 		        u64 snapid; | ||||||
|  | 	        } snap; | ||||||
|  | 	}; | ||||||
|  | 	u32 payload_len; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| extern int ceph_osdc_init(struct ceph_osd_client *osdc, | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | ||||||
| 			  struct ceph_client *client); | 			  struct ceph_client *client); | ||||||
| extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | ||||||
| @ -119,6 +162,30 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | |||||||
| extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | ||||||
| 				 struct ceph_msg *msg); | 				 struct ceph_msg *msg); | ||||||
| 
 | 
 | ||||||
|  | extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||||||
|  | 			struct ceph_file_layout *layout, | ||||||
|  | 			u64 snapid, | ||||||
|  | 			u64 off, u64 *plen, u64 *bno, | ||||||
|  | 			struct ceph_osd_request *req, | ||||||
|  | 			struct ceph_osd_req_op *op); | ||||||
|  | 
 | ||||||
|  | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | ||||||
|  | 					       int flags, | ||||||
|  | 					       struct ceph_snap_context *snapc, | ||||||
|  | 					       struct ceph_osd_req_op *ops, | ||||||
|  | 					       bool use_mempool, | ||||||
|  | 					       gfp_t gfp_flags, | ||||||
|  | 					       struct page **pages, | ||||||
|  | 					       struct bio *bio); | ||||||
|  | 
 | ||||||
|  | extern void ceph_osdc_build_request(struct ceph_osd_request *req, | ||||||
|  | 				    u64 off, u64 *plen, | ||||||
|  | 				    struct ceph_osd_req_op *src_ops, | ||||||
|  | 				    struct ceph_snap_context *snapc, | ||||||
|  | 				    struct timespec *mtime, | ||||||
|  | 				    const char *oid, | ||||||
|  | 				    int oid_len); | ||||||
|  | 
 | ||||||
| extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | ||||||
| 				      struct ceph_file_layout *layout, | 				      struct ceph_file_layout *layout, | ||||||
| 				      struct ceph_vino vino, | 				      struct ceph_vino vino, | ||||||
| @ -4,7 +4,7 @@ | |||||||
| #include <linux/rbtree.h> | #include <linux/rbtree.h> | ||||||
| #include "types.h" | #include "types.h" | ||||||
| #include "ceph_fs.h" | #include "ceph_fs.h" | ||||||
| #include "crush/crush.h" | #include <linux/crush/crush.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * The osd map describes the current membership of the osd cluster and |  * The osd map describes the current membership of the osd cluster and | ||||||
| @ -125,4 +125,6 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||||||
| extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | ||||||
| 				struct ceph_pg pgid); | 				struct ceph_pg pgid); | ||||||
| 
 | 
 | ||||||
|  | extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); | ||||||
|  | 
 | ||||||
| #endif | #endif | ||||||
| @ -8,6 +8,14 @@ struct ceph_pagelist { | |||||||
| 	void *mapped_tail; | 	void *mapped_tail; | ||||||
| 	size_t length; | 	size_t length; | ||||||
| 	size_t room; | 	size_t room; | ||||||
|  | 	struct list_head free_list; | ||||||
|  | 	size_t num_pages_free; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct ceph_pagelist_cursor { | ||||||
|  | 	struct ceph_pagelist *pl;   /* pagelist, for error checking */ | ||||||
|  | 	struct list_head *page_lru; /* page in list */ | ||||||
|  | 	size_t room;		    /* room remaining to reset to */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | ||||||
| @ -16,10 +24,23 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | |||||||
| 	pl->mapped_tail = NULL; | 	pl->mapped_tail = NULL; | ||||||
| 	pl->length = 0; | 	pl->length = 0; | ||||||
| 	pl->room = 0; | 	pl->room = 0; | ||||||
|  | 	INIT_LIST_HEAD(&pl->free_list); | ||||||
|  | 	pl->num_pages_free = 0; | ||||||
| } | } | ||||||
|  | 
 | ||||||
| extern int ceph_pagelist_release(struct ceph_pagelist *pl); | extern int ceph_pagelist_release(struct ceph_pagelist *pl); | ||||||
| 
 | 
 | ||||||
| extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); | extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); | ||||||
|  | 
 | ||||||
|  | extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); | ||||||
|  | 
 | ||||||
|  | extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); | ||||||
|  | 
 | ||||||
|  | extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||||||
|  | 				     struct ceph_pagelist_cursor *c); | ||||||
|  | 
 | ||||||
|  | extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||||||
|  | 				  struct ceph_pagelist_cursor *c); | ||||||
| 
 | 
 | ||||||
| static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) | ||||||
| { | { | ||||||
| @ -293,6 +293,7 @@ source "net/wimax/Kconfig" | |||||||
| source "net/rfkill/Kconfig" | source "net/rfkill/Kconfig" | ||||||
| source "net/9p/Kconfig" | source "net/9p/Kconfig" | ||||||
| source "net/caif/Kconfig" | source "net/caif/Kconfig" | ||||||
|  | source "net/ceph/Kconfig" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| endif   # if NET | endif   # if NET | ||||||
|  | |||||||
| @ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL)		+= sysctl_net.o | |||||||
| endif | endif | ||||||
| obj-$(CONFIG_WIMAX)		+= wimax/ | obj-$(CONFIG_WIMAX)		+= wimax/ | ||||||
| obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/ | obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/ | ||||||
|  | obj-$(CONFIG_CEPH_LIB)		+= ceph/ | ||||||
|  | |||||||
							
								
								
									
										28
									
								
								net/ceph/Kconfig
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								net/ceph/Kconfig
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | |||||||
|  | config CEPH_LIB | ||||||
|  |         tristate "Ceph core library (EXPERIMENTAL)" | ||||||
|  | 	depends on INET && EXPERIMENTAL | ||||||
|  | 	select LIBCRC32C | ||||||
|  | 	select CRYPTO_AES | ||||||
|  | 	select CRYPTO | ||||||
|  | 	default n | ||||||
|  | 	help | ||||||
|  | 	  Choose Y or M here to include cephlib, which provides the | ||||||
|  | 	  common functionality to both the Ceph filesystem and | ||||||
|  | 	  to the rados block device (rbd). | ||||||
|  | 
 | ||||||
|  | 	  More information at http://ceph.newdream.net/. | ||||||
|  | 
 | ||||||
|  | 	  If unsure, say N. | ||||||
|  | 
 | ||||||
|  | config CEPH_LIB_PRETTYDEBUG | ||||||
|  | 	bool "Include file:line in ceph debug output" | ||||||
|  | 	depends on CEPH_LIB | ||||||
|  | 	default n | ||||||
|  | 	help | ||||||
|  | 	  If you say Y here, debug output will include a filename and | ||||||
|  | 	  line to aid debugging.  This increases kernel size and slows | ||||||
|  | 	  execution slightly when debug call sites are enabled (e.g., | ||||||
|  | 	  via CONFIG_DYNAMIC_DEBUG). | ||||||
|  | 
 | ||||||
|  | 	  If unsure, say N. | ||||||
|  | 
 | ||||||
							
								
								
									
										37
									
								
								net/ceph/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								net/ceph/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | |||||||
|  | #
 | ||||||
|  | # Makefile for CEPH filesystem.
 | ||||||
|  | #
 | ||||||
|  | 
 | ||||||
|  | ifneq ($(KERNELRELEASE),) | ||||||
|  | 
 | ||||||
|  | obj-$(CONFIG_CEPH_LIB) += libceph.o | ||||||
|  | 
 | ||||||
|  | libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 | ||||||
|  | 	mon_client.o \
 | ||||||
|  | 	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
 | ||||||
|  | 	debugfs.o \
 | ||||||
|  | 	auth.o auth_none.o \
 | ||||||
|  | 	crypto.o armor.o \
 | ||||||
|  | 	auth_x.o \
 | ||||||
|  | 	ceph_fs.o ceph_strings.o ceph_hash.o \
 | ||||||
|  | 	pagevec.o | ||||||
|  | 
 | ||||||
|  | else | ||||||
|  | #Otherwise we were called directly from the command
 | ||||||
|  | # line; invoke the kernel build system.
 | ||||||
|  | 
 | ||||||
|  | KERNELDIR ?= /lib/modules/$(shell uname -r)/build | ||||||
|  | PWD := $(shell pwd) | ||||||
|  | 
 | ||||||
|  | default: all | ||||||
|  | 
 | ||||||
|  | all: | ||||||
|  | 	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules | ||||||
|  | 
 | ||||||
|  | modules_install: | ||||||
|  | 	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install | ||||||
|  | 
 | ||||||
|  | clean: | ||||||
|  | 	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean | ||||||
|  | 
 | ||||||
|  | endif | ||||||
| @ -1,16 +1,16 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | 
 | ||||||
| #include "types.h" | #include <linux/ceph/types.h> | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
|  | #include <linux/ceph/libceph.h> | ||||||
|  | #include <linux/ceph/messenger.h> | ||||||
| #include "auth_none.h" | #include "auth_none.h" | ||||||
| #include "auth_x.h" | #include "auth_x.h" | ||||||
| #include "decode.h" |  | ||||||
| #include "super.h" |  | ||||||
| 
 | 
 | ||||||
| #include "messenger.h" |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * get protocol handler |  * get protocol handler | ||||||
| @ -1,14 +1,15 @@ | |||||||
| 
 | 
 | ||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | 
 | ||||||
| #include "auth_none.h" | #include "auth_none.h" | ||||||
| #include "auth.h" |  | ||||||
| #include "decode.h" |  | ||||||
| 
 | 
 | ||||||
| static void reset(struct ceph_auth_client *ac) | static void reset(struct ceph_auth_client *ac) | ||||||
| { | { | ||||||
| @ -2,8 +2,7 @@ | |||||||
| #define _FS_CEPH_AUTH_NONE_H | #define _FS_CEPH_AUTH_NONE_H | ||||||
| 
 | 
 | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | #include <linux/ceph/auth.h> | ||||||
| #include "auth.h" |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * null security mode. |  * null security mode. | ||||||
| @ -1,16 +1,17 @@ | |||||||
| 
 | 
 | ||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | 
 | ||||||
|  | #include "crypto.h" | ||||||
| #include "auth_x.h" | #include "auth_x.h" | ||||||
| #include "auth_x_protocol.h" | #include "auth_x_protocol.h" | ||||||
| #include "crypto.h" |  | ||||||
| #include "auth.h" |  | ||||||
| #include "decode.h" |  | ||||||
| 
 | 
 | ||||||
| #define TEMP_TICKET_BUF_LEN	256 | #define TEMP_TICKET_BUF_LEN	256 | ||||||
| 
 | 
 | ||||||
| @ -3,8 +3,9 @@ | |||||||
| 
 | 
 | ||||||
| #include <linux/rbtree.h> | #include <linux/rbtree.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | 
 | ||||||
| #include "crypto.h" | #include "crypto.h" | ||||||
| #include "auth.h" |  | ||||||
| #include "auth_x_protocol.h" | #include "auth_x_protocol.h" | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -1,10 +1,11 @@ | |||||||
| 
 | 
 | ||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/module.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| 
 | 
 | ||||||
| #include "buffer.h" | #include <linux/ceph/buffer.h> | ||||||
| #include "decode.h" | #include <linux/ceph/decode.h> | ||||||
| 
 | 
 | ||||||
| struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | ||||||
| { | { | ||||||
| @ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | |||||||
| 	dout("buffer_new %p\n", b); | 	dout("buffer_new %p\n", b); | ||||||
| 	return b; | 	return b; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_buffer_new); | ||||||
| 
 | 
 | ||||||
| void ceph_buffer_release(struct kref *kref) | void ceph_buffer_release(struct kref *kref) | ||||||
| { | { | ||||||
| @ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref) | |||||||
| 	} | 	} | ||||||
| 	kfree(b); | 	kfree(b); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_buffer_release); | ||||||
| 
 | 
 | ||||||
| int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | ||||||
| { | { | ||||||
							
								
								
									
										529
									
								
								net/ceph/ceph_common.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										529
									
								
								net/ceph/ceph_common.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,529 @@ | |||||||
|  | 
 | ||||||
|  | #include <linux/ceph/ceph_debug.h> | ||||||
|  | #include <linux/backing-dev.h> | ||||||
|  | #include <linux/ctype.h> | ||||||
|  | #include <linux/fs.h> | ||||||
|  | #include <linux/inet.h> | ||||||
|  | #include <linux/in6.h> | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/mount.h> | ||||||
|  | #include <linux/parser.h> | ||||||
|  | #include <linux/sched.h> | ||||||
|  | #include <linux/seq_file.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | #include <linux/statfs.h> | ||||||
|  | #include <linux/string.h> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #include <linux/ceph/libceph.h> | ||||||
|  | #include <linux/ceph/debugfs.h> | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
|  | #include <linux/ceph/mon_client.h> | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * find filename portion of a path (/foo/bar/baz -> baz) | ||||||
|  |  */ | ||||||
|  | const char *ceph_file_part(const char *s, int len) | ||||||
|  | { | ||||||
|  | 	const char *e = s + len; | ||||||
|  | 
 | ||||||
|  | 	while (e != s && *(e-1) != '/') | ||||||
|  | 		e--; | ||||||
|  | 	return e; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_file_part); | ||||||
|  | 
 | ||||||
|  | const char *ceph_msg_type_name(int type) | ||||||
|  | { | ||||||
|  | 	switch (type) { | ||||||
|  | 	case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||||||
|  | 	case CEPH_MSG_PING: return "ping"; | ||||||
|  | 	case CEPH_MSG_AUTH: return "auth"; | ||||||
|  | 	case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||||||
|  | 	case CEPH_MSG_MON_MAP: return "mon_map"; | ||||||
|  | 	case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||||||
|  | 	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||||||
|  | 	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||||||
|  | 	case CEPH_MSG_STATFS: return "statfs"; | ||||||
|  | 	case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||||||
|  | 	case CEPH_MSG_MDS_MAP: return "mds_map"; | ||||||
|  | 	case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||||||
|  | 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||||||
|  | 	case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||||||
|  | 	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||||||
|  | 	case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||||||
|  | 	case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||||||
|  | 	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||||||
|  | 	case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||||||
|  | 	case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||||||
|  | 	case CEPH_MSG_OSD_MAP: return "osd_map"; | ||||||
|  | 	case CEPH_MSG_OSD_OP: return "osd_op"; | ||||||
|  | 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||||||
|  | 	default: return "unknown"; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_msg_type_name); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Initially learn our fsid, or verify an fsid matches. | ||||||
|  |  */ | ||||||
|  | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | ||||||
|  | { | ||||||
|  | 	if (client->have_fsid) { | ||||||
|  | 		if (ceph_fsid_compare(&client->fsid, fsid)) { | ||||||
|  | 			pr_err("bad fsid, had %pU got %pU", | ||||||
|  | 			       &client->fsid, fsid); | ||||||
|  | 			return -1; | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); | ||||||
|  | 		memcpy(&client->fsid, fsid, sizeof(*fsid)); | ||||||
|  | 		ceph_debugfs_client_init(client); | ||||||
|  | 		client->have_fsid = true; | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_check_fsid); | ||||||
|  | 
 | ||||||
|  | static int strcmp_null(const char *s1, const char *s2) | ||||||
|  | { | ||||||
|  | 	if (!s1 && !s2) | ||||||
|  | 		return 0; | ||||||
|  | 	if (s1 && !s2) | ||||||
|  | 		return -1; | ||||||
|  | 	if (!s1 && s2) | ||||||
|  | 		return 1; | ||||||
|  | 	return strcmp(s1, s2); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int ceph_compare_options(struct ceph_options *new_opt, | ||||||
|  | 			 struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	struct ceph_options *opt1 = new_opt; | ||||||
|  | 	struct ceph_options *opt2 = client->options; | ||||||
|  | 	int ofs = offsetof(struct ceph_options, mon_addr); | ||||||
|  | 	int i; | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
|  | 	ret = memcmp(opt1, opt2, ofs); | ||||||
|  | 	if (ret) | ||||||
|  | 		return ret; | ||||||
|  | 
 | ||||||
|  | 	ret = strcmp_null(opt1->name, opt2->name); | ||||||
|  | 	if (ret) | ||||||
|  | 		return ret; | ||||||
|  | 
 | ||||||
|  | 	ret = strcmp_null(opt1->secret, opt2->secret); | ||||||
|  | 	if (ret) | ||||||
|  | 		return ret; | ||||||
|  | 
 | ||||||
|  | 	/* any matching mon ip implies a match */ | ||||||
|  | 	for (i = 0; i < opt1->num_mon; i++) { | ||||||
|  | 		if (ceph_monmap_contains(client->monc.monmap, | ||||||
|  | 				 &opt1->mon_addr[i])) | ||||||
|  | 			return 0; | ||||||
|  | 	} | ||||||
|  | 	return -1; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_compare_options); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||||||
|  | { | ||||||
|  | 	int i = 0; | ||||||
|  | 	char tmp[3]; | ||||||
|  | 	int err = -EINVAL; | ||||||
|  | 	int d; | ||||||
|  | 
 | ||||||
|  | 	dout("parse_fsid '%s'\n", str); | ||||||
|  | 	tmp[2] = 0; | ||||||
|  | 	while (*str && i < 16) { | ||||||
|  | 		if (ispunct(*str)) { | ||||||
|  | 			str++; | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 		if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||||||
|  | 			break; | ||||||
|  | 		tmp[0] = str[0]; | ||||||
|  | 		tmp[1] = str[1]; | ||||||
|  | 		if (sscanf(tmp, "%x", &d) < 1) | ||||||
|  | 			break; | ||||||
|  | 		fsid->fsid[i] = d & 0xff; | ||||||
|  | 		i++; | ||||||
|  | 		str += 2; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (i == 16) | ||||||
|  | 		err = 0; | ||||||
|  | 	dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * ceph options | ||||||
|  |  */ | ||||||
|  | enum { | ||||||
|  | 	Opt_osdtimeout, | ||||||
|  | 	Opt_osdkeepalivetimeout, | ||||||
|  | 	Opt_mount_timeout, | ||||||
|  | 	Opt_osd_idle_ttl, | ||||||
|  | 	Opt_last_int, | ||||||
|  | 	/* int args above */ | ||||||
|  | 	Opt_fsid, | ||||||
|  | 	Opt_name, | ||||||
|  | 	Opt_secret, | ||||||
|  | 	Opt_ip, | ||||||
|  | 	Opt_last_string, | ||||||
|  | 	/* string args above */ | ||||||
|  | 	Opt_noshare, | ||||||
|  | 	Opt_nocrc, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static match_table_t opt_tokens = { | ||||||
|  | 	{Opt_osdtimeout, "osdtimeout=%d"}, | ||||||
|  | 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||||||
|  | 	{Opt_mount_timeout, "mount_timeout=%d"}, | ||||||
|  | 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||||||
|  | 	/* int args above */ | ||||||
|  | 	{Opt_fsid, "fsid=%s"}, | ||||||
|  | 	{Opt_name, "name=%s"}, | ||||||
|  | 	{Opt_secret, "secret=%s"}, | ||||||
|  | 	{Opt_ip, "ip=%s"}, | ||||||
|  | 	/* string args above */ | ||||||
|  | 	{Opt_noshare, "noshare"}, | ||||||
|  | 	{Opt_nocrc, "nocrc"}, | ||||||
|  | 	{-1, NULL} | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | void ceph_destroy_options(struct ceph_options *opt) | ||||||
|  | { | ||||||
|  | 	dout("destroy_options %p\n", opt); | ||||||
|  | 	kfree(opt->name); | ||||||
|  | 	kfree(opt->secret); | ||||||
|  | 	kfree(opt); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_destroy_options); | ||||||
|  | 
 | ||||||
|  | int ceph_parse_options(struct ceph_options **popt, char *options, | ||||||
|  | 		       const char *dev_name, const char *dev_name_end, | ||||||
|  | 		       int (*parse_extra_token)(char *c, void *private), | ||||||
|  | 		       void *private) | ||||||
|  | { | ||||||
|  | 	struct ceph_options *opt; | ||||||
|  | 	const char *c; | ||||||
|  | 	int err = -ENOMEM; | ||||||
|  | 	substring_t argstr[MAX_OPT_ARGS]; | ||||||
|  | 
 | ||||||
|  | 	opt = kzalloc(sizeof(*opt), GFP_KERNEL); | ||||||
|  | 	if (!opt) | ||||||
|  | 		return err; | ||||||
|  | 	opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | ||||||
|  | 				GFP_KERNEL); | ||||||
|  | 	if (!opt->mon_addr) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, | ||||||
|  | 	     dev_name); | ||||||
|  | 
 | ||||||
|  | 	/* start with defaults */ | ||||||
|  | 	opt->flags = CEPH_OPT_DEFAULT; | ||||||
|  | 	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | ||||||
|  | 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | ||||||
|  | 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | ||||||
|  | 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */ | ||||||
|  | 
 | ||||||
|  | 	/* get mon ip(s) */ | ||||||
|  | 	/* ip1[:port1][,ip2[:port2]...] */ | ||||||
|  | 	err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, | ||||||
|  | 			     CEPH_MAX_MON, &opt->num_mon); | ||||||
|  | 	if (err < 0) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	/* parse mount options */ | ||||||
|  | 	while ((c = strsep(&options, ",")) != NULL) { | ||||||
|  | 		int token, intval, ret; | ||||||
|  | 		if (!*c) | ||||||
|  | 			continue; | ||||||
|  | 		err = -EINVAL; | ||||||
|  | 		token = match_token((char *)c, opt_tokens, argstr); | ||||||
|  | 		if (token < 0 && parse_extra_token) { | ||||||
|  | 			/* extra? */ | ||||||
|  | 			err = parse_extra_token((char *)c, private); | ||||||
|  | 			if (err < 0) { | ||||||
|  | 				pr_err("bad option at '%s'\n", c); | ||||||
|  | 				goto out; | ||||||
|  | 			} | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 		if (token < Opt_last_int) { | ||||||
|  | 			ret = match_int(&argstr[0], &intval); | ||||||
|  | 			if (ret < 0) { | ||||||
|  | 				pr_err("bad mount option arg (not int) " | ||||||
|  | 				       "at '%s'\n", c); | ||||||
|  | 				continue; | ||||||
|  | 			} | ||||||
|  | 			dout("got int token %d val %d\n", token, intval); | ||||||
|  | 		} else if (token > Opt_last_int && token < Opt_last_string) { | ||||||
|  | 			dout("got string token %d val %s\n", token, | ||||||
|  | 			     argstr[0].from); | ||||||
|  | 		} else { | ||||||
|  | 			dout("got token %d\n", token); | ||||||
|  | 		} | ||||||
|  | 		switch (token) { | ||||||
|  | 		case Opt_ip: | ||||||
|  | 			err = ceph_parse_ips(argstr[0].from, | ||||||
|  | 					     argstr[0].to, | ||||||
|  | 					     &opt->my_addr, | ||||||
|  | 					     1, NULL); | ||||||
|  | 			if (err < 0) | ||||||
|  | 				goto out; | ||||||
|  | 			opt->flags |= CEPH_OPT_MYIP; | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		case Opt_fsid: | ||||||
|  | 			err = parse_fsid(argstr[0].from, &opt->fsid); | ||||||
|  | 			if (err == 0) | ||||||
|  | 				opt->flags |= CEPH_OPT_FSID; | ||||||
|  | 			break; | ||||||
|  | 		case Opt_name: | ||||||
|  | 			opt->name = kstrndup(argstr[0].from, | ||||||
|  | 					      argstr[0].to-argstr[0].from, | ||||||
|  | 					      GFP_KERNEL); | ||||||
|  | 			break; | ||||||
|  | 		case Opt_secret: | ||||||
|  | 			opt->secret = kstrndup(argstr[0].from, | ||||||
|  | 						argstr[0].to-argstr[0].from, | ||||||
|  | 						GFP_KERNEL); | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 			/* misc */ | ||||||
|  | 		case Opt_osdtimeout: | ||||||
|  | 			opt->osd_timeout = intval; | ||||||
|  | 			break; | ||||||
|  | 		case Opt_osdkeepalivetimeout: | ||||||
|  | 			opt->osd_keepalive_timeout = intval; | ||||||
|  | 			break; | ||||||
|  | 		case Opt_osd_idle_ttl: | ||||||
|  | 			opt->osd_idle_ttl = intval; | ||||||
|  | 			break; | ||||||
|  | 		case Opt_mount_timeout: | ||||||
|  | 			opt->mount_timeout = intval; | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		case Opt_noshare: | ||||||
|  | 			opt->flags |= CEPH_OPT_NOSHARE; | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		case Opt_nocrc: | ||||||
|  | 			opt->flags |= CEPH_OPT_NOCRC; | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		default: | ||||||
|  | 			BUG_ON(token); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* success */ | ||||||
|  | 	*popt = opt; | ||||||
|  | 	return 0; | ||||||
|  | 
 | ||||||
|  | out: | ||||||
|  | 	ceph_destroy_options(opt); | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_parse_options); | ||||||
|  | 
 | ||||||
|  | u64 ceph_client_id(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	return client->monc.auth->global_id; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_client_id); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * create a fresh client instance | ||||||
|  |  */ | ||||||
|  | struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) | ||||||
|  | { | ||||||
|  | 	struct ceph_client *client; | ||||||
|  | 	int err = -ENOMEM; | ||||||
|  | 
 | ||||||
|  | 	client = kzalloc(sizeof(*client), GFP_KERNEL); | ||||||
|  | 	if (client == NULL) | ||||||
|  | 		return ERR_PTR(-ENOMEM); | ||||||
|  | 
 | ||||||
|  | 	client->private = private; | ||||||
|  | 	client->options = opt; | ||||||
|  | 
 | ||||||
|  | 	mutex_init(&client->mount_mutex); | ||||||
|  | 	init_waitqueue_head(&client->auth_wq); | ||||||
|  | 	client->auth_err = 0; | ||||||
|  | 
 | ||||||
|  | 	client->extra_mon_dispatch = NULL; | ||||||
|  | 	client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; | ||||||
|  | 	client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; | ||||||
|  | 
 | ||||||
|  | 	client->msgr = NULL; | ||||||
|  | 
 | ||||||
|  | 	/* subsystems */ | ||||||
|  | 	err = ceph_monc_init(&client->monc, client); | ||||||
|  | 	if (err < 0) | ||||||
|  | 		goto fail; | ||||||
|  | 	err = ceph_osdc_init(&client->osdc, client); | ||||||
|  | 	if (err < 0) | ||||||
|  | 		goto fail_monc; | ||||||
|  | 
 | ||||||
|  | 	return client; | ||||||
|  | 
 | ||||||
|  | fail_monc: | ||||||
|  | 	ceph_monc_stop(&client->monc); | ||||||
|  | fail: | ||||||
|  | 	kfree(client); | ||||||
|  | 	return ERR_PTR(err); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_create_client); | ||||||
|  | 
 | ||||||
|  | void ceph_destroy_client(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	dout("destroy_client %p\n", client); | ||||||
|  | 
 | ||||||
|  | 	/* unmount */ | ||||||
|  | 	ceph_osdc_stop(&client->osdc); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * make sure mds and osd connections close out before destroying | ||||||
|  | 	 * the auth module, which is needed to free those connections' | ||||||
|  | 	 * ceph_authorizers. | ||||||
|  | 	 */ | ||||||
|  | 	ceph_msgr_flush(); | ||||||
|  | 
 | ||||||
|  | 	ceph_monc_stop(&client->monc); | ||||||
|  | 
 | ||||||
|  | 	ceph_debugfs_client_cleanup(client); | ||||||
|  | 
 | ||||||
|  | 	if (client->msgr) | ||||||
|  | 		ceph_messenger_destroy(client->msgr); | ||||||
|  | 
 | ||||||
|  | 	ceph_destroy_options(client->options); | ||||||
|  | 
 | ||||||
|  | 	kfree(client); | ||||||
|  | 	dout("destroy_client %p done\n", client); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_destroy_client); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * true if we have the mon map (and have thus joined the cluster) | ||||||
|  |  */ | ||||||
|  | static int have_mon_and_osd_map(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	return client->monc.monmap && client->monc.monmap->epoch && | ||||||
|  | 	       client->osdc.osdmap && client->osdc.osdmap->epoch; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * mount: join the ceph cluster, and open root directory. | ||||||
|  |  */ | ||||||
|  | int __ceph_open_session(struct ceph_client *client, unsigned long started) | ||||||
|  | { | ||||||
|  | 	struct ceph_entity_addr *myaddr = NULL; | ||||||
|  | 	int err; | ||||||
|  | 	unsigned long timeout = client->options->mount_timeout * HZ; | ||||||
|  | 
 | ||||||
|  | 	/* initialize the messenger */ | ||||||
|  | 	if (client->msgr == NULL) { | ||||||
|  | 		if (ceph_test_opt(client, MYIP)) | ||||||
|  | 			myaddr = &client->options->my_addr; | ||||||
|  | 		client->msgr = ceph_messenger_create(myaddr, | ||||||
|  | 					client->supported_features, | ||||||
|  | 					client->required_features); | ||||||
|  | 		if (IS_ERR(client->msgr)) { | ||||||
|  | 			client->msgr = NULL; | ||||||
|  | 			return PTR_ERR(client->msgr); | ||||||
|  | 		} | ||||||
|  | 		client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* open session, and wait for mon and osd maps */ | ||||||
|  | 	err = ceph_monc_open_session(&client->monc); | ||||||
|  | 	if (err < 0) | ||||||
|  | 		return err; | ||||||
|  | 
 | ||||||
|  | 	while (!have_mon_and_osd_map(client)) { | ||||||
|  | 		err = -EIO; | ||||||
|  | 		if (timeout && time_after_eq(jiffies, started + timeout)) | ||||||
|  | 			return err; | ||||||
|  | 
 | ||||||
|  | 		/* wait */ | ||||||
|  | 		dout("mount waiting for mon_map\n"); | ||||||
|  | 		err = wait_event_interruptible_timeout(client->auth_wq, | ||||||
|  | 			have_mon_and_osd_map(client) || (client->auth_err < 0), | ||||||
|  | 			timeout); | ||||||
|  | 		if (err == -EINTR || err == -ERESTARTSYS) | ||||||
|  | 			return err; | ||||||
|  | 		if (client->auth_err < 0) | ||||||
|  | 			return client->auth_err; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(__ceph_open_session); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | int ceph_open_session(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	int ret; | ||||||
|  | 	unsigned long started = jiffies;  /* note the start time */ | ||||||
|  | 
 | ||||||
|  | 	dout("open_session start\n"); | ||||||
|  | 	mutex_lock(&client->mount_mutex); | ||||||
|  | 
 | ||||||
|  | 	ret = __ceph_open_session(client, started); | ||||||
|  | 
 | ||||||
|  | 	mutex_unlock(&client->mount_mutex); | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_open_session); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static int __init init_ceph_lib(void) | ||||||
|  | { | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	ret = ceph_debugfs_init(); | ||||||
|  | 	if (ret < 0) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	ret = ceph_msgr_init(); | ||||||
|  | 	if (ret < 0) | ||||||
|  | 		goto out_debugfs; | ||||||
|  | 
 | ||||||
|  | 	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", | ||||||
|  | 		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, | ||||||
|  | 		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||||||
|  | 		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | 
 | ||||||
|  | out_debugfs: | ||||||
|  | 	ceph_debugfs_cleanup(); | ||||||
|  | out: | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __exit exit_ceph_lib(void) | ||||||
|  | { | ||||||
|  | 	dout("exit_ceph_lib\n"); | ||||||
|  | 	ceph_msgr_exit(); | ||||||
|  | 	ceph_debugfs_cleanup(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | module_init(init_ceph_lib); | ||||||
|  | module_exit(exit_ceph_lib); | ||||||
|  | 
 | ||||||
|  | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||||||
|  | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||||||
|  | MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); | ||||||
|  | MODULE_DESCRIPTION("Ceph filesystem for Linux"); | ||||||
|  | MODULE_LICENSE("GPL"); | ||||||
| @ -1,7 +1,8 @@ | |||||||
| /*
 | /*
 | ||||||
|  * Some non-inline ceph helpers |  * Some non-inline ceph helpers | ||||||
|  */ |  */ | ||||||
| #include "types.h" | #include <linux/module.h> | ||||||
|  | #include <linux/ceph/types.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * return true if @layout appears to be valid |  * return true if @layout appears to be valid | ||||||
| @ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags) | |||||||
| 
 | 
 | ||||||
| 	return mode; | 	return mode; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_flags_to_mode); | ||||||
| 
 | 
 | ||||||
| int ceph_caps_for_mode(int mode) | int ceph_caps_for_mode(int mode) | ||||||
| { | { | ||||||
| @ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode) | |||||||
| 
 | 
 | ||||||
| 	return caps; | 	return caps; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_caps_for_mode); | ||||||
| @ -1,5 +1,5 @@ | |||||||
| 
 | 
 | ||||||
| #include "types.h" | #include <linux/ceph/types.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Robert Jenkin's hash function. |  * Robert Jenkin's hash function. | ||||||
							
								
								
									
										84
									
								
								net/ceph/ceph_strings.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								net/ceph/ceph_strings.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,84 @@ | |||||||
|  | /*
 | ||||||
|  |  * Ceph string constants | ||||||
|  |  */ | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/ceph/types.h> | ||||||
|  | 
 | ||||||
|  | const char *ceph_entity_type_name(int type) | ||||||
|  | { | ||||||
|  | 	switch (type) { | ||||||
|  | 	case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||||||
|  | 	case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||||||
|  | 	case CEPH_ENTITY_TYPE_MON: return "mon"; | ||||||
|  | 	case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||||||
|  | 	case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||||||
|  | 	default: return "unknown"; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const char *ceph_osd_op_name(int op) | ||||||
|  | { | ||||||
|  | 	switch (op) { | ||||||
|  | 	case CEPH_OSD_OP_READ: return "read"; | ||||||
|  | 	case CEPH_OSD_OP_STAT: return "stat"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_WRITE: return "write"; | ||||||
|  | 	case CEPH_OSD_OP_DELETE: return "delete"; | ||||||
|  | 	case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||||||
|  | 	case CEPH_OSD_OP_ZERO: return "zero"; | ||||||
|  | 	case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||||||
|  | 	case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_APPEND: return "append"; | ||||||
|  | 	case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||||||
|  | 	case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||||||
|  | 	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||||||
|  | 	case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||||||
|  | 	case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||||||
|  | 	case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||||||
|  | 	case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||||||
|  | 	case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||||||
|  | 	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||||||
|  | 	case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||||||
|  | 	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_PULL: return "pull"; | ||||||
|  | 	case CEPH_OSD_OP_PUSH: return "push"; | ||||||
|  | 	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||||||
|  | 	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||||||
|  | 	case CEPH_OSD_OP_SCRUB: return "scrub"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||||||
|  | 	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||||||
|  | 	case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||||||
|  | 	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||||||
|  | 	case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||||||
|  | 	case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_CALL: return "call"; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_PGLS: return "pgls"; | ||||||
|  | 	} | ||||||
|  | 	return "???"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | const char *ceph_pool_op_name(int op) | ||||||
|  | { | ||||||
|  | 	switch (op) { | ||||||
|  | 	case POOL_OP_CREATE: return "create"; | ||||||
|  | 	case POOL_OP_DELETE: return "delete"; | ||||||
|  | 	case POOL_OP_AUID_CHANGE: return "auid change"; | ||||||
|  | 	case POOL_OP_CREATE_SNAP: return "create snap"; | ||||||
|  | 	case POOL_OP_DELETE_SNAP: return "delete snap"; | ||||||
|  | 	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||||||
|  | 	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||||||
|  | 	} | ||||||
|  | 	return "???"; | ||||||
|  | } | ||||||
| @ -8,7 +8,7 @@ | |||||||
| # define BUG_ON(x) assert(!(x)) | # define BUG_ON(x) assert(!(x)) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #include "crush.h" | #include <linux/crush/crush.h> | ||||||
| 
 | 
 | ||||||
| const char *crush_bucket_alg_name(int alg) | const char *crush_bucket_alg_name(int alg) | ||||||
| { | { | ||||||
| @ -1,6 +1,6 @@ | |||||||
| 
 | 
 | ||||||
| #include <linux/types.h> | #include <linux/types.h> | ||||||
| #include "hash.h" | #include <linux/crush/hash.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Robert Jenkins' function for mixing 32-bit values |  * Robert Jenkins' function for mixing 32-bit values | ||||||
| @ -18,8 +18,8 @@ | |||||||
| # define kfree(x) free(x) | # define kfree(x) free(x) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #include "crush.h" | #include <linux/crush/crush.h> | ||||||
| #include "hash.h" | #include <linux/crush/hash.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Implement the core CRUSH mapping algorithm. |  * Implement the core CRUSH mapping algorithm. | ||||||
| @ -1,13 +1,13 @@ | |||||||
| 
 | 
 | ||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| #include <linux/scatterlist.h> | #include <linux/scatterlist.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <crypto/hash.h> | #include <crypto/hash.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/ceph/decode.h> | ||||||
| #include "crypto.h" | #include "crypto.h" | ||||||
| #include "decode.h" |  | ||||||
| 
 | 
 | ||||||
| int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) | ||||||
| { | { | ||||||
| @ -1,8 +1,8 @@ | |||||||
| #ifndef _FS_CEPH_CRYPTO_H | #ifndef _FS_CEPH_CRYPTO_H | ||||||
| #define _FS_CEPH_CRYPTO_H | #define _FS_CEPH_CRYPTO_H | ||||||
| 
 | 
 | ||||||
| #include "types.h" | #include <linux/ceph/types.h> | ||||||
| #include "buffer.h" | #include <linux/ceph/buffer.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * cryptographic secret |  * cryptographic secret | ||||||
							
								
								
									
										267
									
								
								net/ceph/debugfs.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								net/ceph/debugfs.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,267 @@ | |||||||
|  | #include <linux/ceph/ceph_debug.h> | ||||||
|  | 
 | ||||||
|  | #include <linux/device.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/ctype.h> | ||||||
|  | #include <linux/debugfs.h> | ||||||
|  | #include <linux/seq_file.h> | ||||||
|  | 
 | ||||||
|  | #include <linux/ceph/libceph.h> | ||||||
|  | #include <linux/ceph/mon_client.h> | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
|  | #include <linux/ceph/debugfs.h> | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_DEBUG_FS | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Implement /sys/kernel/debug/ceph fun | ||||||
|  |  * | ||||||
|  |  * /sys/kernel/debug/ceph/client*  - an instance of the ceph client | ||||||
|  |  *      .../osdmap      - current osdmap | ||||||
|  |  *      .../monmap      - current monmap | ||||||
|  |  *      .../osdc        - active osd requests | ||||||
|  |  *      .../monc        - mon client state | ||||||
|  |  *      .../dentry_lru  - dump contents of dentry lru | ||||||
|  |  *      .../caps        - expose cap (reservation) stats | ||||||
|  |  *      .../bdi         - symlink to ../../bdi/something | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | static struct dentry *ceph_debugfs_dir; | ||||||
|  | 
 | ||||||
|  | static int monmap_show(struct seq_file *s, void *p) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 	struct ceph_client *client = s->private; | ||||||
|  | 
 | ||||||
|  | 	if (client->monc.monmap == NULL) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||||||
|  | 	for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||||||
|  | 		struct ceph_entity_inst *inst = | ||||||
|  | 			&client->monc.monmap->mon_inst[i]; | ||||||
|  | 
 | ||||||
|  | 		seq_printf(s, "\t%s%lld\t%s\n", | ||||||
|  | 			   ENTITY_NAME(inst->name), | ||||||
|  | 			   ceph_pr_addr(&inst->addr.in_addr)); | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int osdmap_show(struct seq_file *s, void *p) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 	struct ceph_client *client = s->private; | ||||||
|  | 	struct rb_node *n; | ||||||
|  | 
 | ||||||
|  | 	if (client->osdc.osdmap == NULL) | ||||||
|  | 		return 0; | ||||||
|  | 	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||||||
|  | 	seq_printf(s, "flags%s%s\n", | ||||||
|  | 		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||||||
|  | 		   " NEARFULL" : "", | ||||||
|  | 		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||||||
|  | 		   " FULL" : ""); | ||||||
|  | 	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||||||
|  | 		struct ceph_pg_pool_info *pool = | ||||||
|  | 			rb_entry(n, struct ceph_pg_pool_info, node); | ||||||
|  | 		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||||||
|  | 			   pool->id, pool->v.pg_num, pool->pg_num_mask, | ||||||
|  | 			   pool->v.lpg_num, pool->lpg_num_mask); | ||||||
|  | 	} | ||||||
|  | 	for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||||||
|  | 		struct ceph_entity_addr *addr = | ||||||
|  | 			&client->osdc.osdmap->osd_addr[i]; | ||||||
|  | 		int state = client->osdc.osdmap->osd_state[i]; | ||||||
|  | 		char sb[64]; | ||||||
|  | 
 | ||||||
|  | 		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||||||
|  | 			   i, ceph_pr_addr(&addr->in_addr), | ||||||
|  | 			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||||||
|  | 			   ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int monc_show(struct seq_file *s, void *p) | ||||||
|  | { | ||||||
|  | 	struct ceph_client *client = s->private; | ||||||
|  | 	struct ceph_mon_generic_request *req; | ||||||
|  | 	struct ceph_mon_client *monc = &client->monc; | ||||||
|  | 	struct rb_node *rp; | ||||||
|  | 
 | ||||||
|  | 	mutex_lock(&monc->mutex); | ||||||
|  | 
 | ||||||
|  | 	if (monc->have_mdsmap) | ||||||
|  | 		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||||||
|  | 	if (monc->have_osdmap) | ||||||
|  | 		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||||||
|  | 	if (monc->want_next_osdmap) | ||||||
|  | 		seq_printf(s, "want next osdmap\n"); | ||||||
|  | 
 | ||||||
|  | 	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||||||
|  | 		__u16 op; | ||||||
|  | 		req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||||||
|  | 		op = le16_to_cpu(req->request->hdr.type); | ||||||
|  | 		if (op == CEPH_MSG_STATFS) | ||||||
|  | 			seq_printf(s, "%lld statfs\n", req->tid); | ||||||
|  | 		else | ||||||
|  | 			seq_printf(s, "%lld unknown\n", req->tid); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	mutex_unlock(&monc->mutex); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int osdc_show(struct seq_file *s, void *pp) | ||||||
|  | { | ||||||
|  | 	struct ceph_client *client = s->private; | ||||||
|  | 	struct ceph_osd_client *osdc = &client->osdc; | ||||||
|  | 	struct rb_node *p; | ||||||
|  | 
 | ||||||
|  | 	mutex_lock(&osdc->request_mutex); | ||||||
|  | 	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||||||
|  | 		struct ceph_osd_request *req; | ||||||
|  | 		struct ceph_osd_request_head *head; | ||||||
|  | 		struct ceph_osd_op *op; | ||||||
|  | 		int num_ops; | ||||||
|  | 		int opcode, olen; | ||||||
|  | 		int i; | ||||||
|  | 
 | ||||||
|  | 		req = rb_entry(p, struct ceph_osd_request, r_node); | ||||||
|  | 
 | ||||||
|  | 		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||||||
|  | 			   req->r_osd ? req->r_osd->o_osd : -1, | ||||||
|  | 			   le32_to_cpu(req->r_pgid.pool), | ||||||
|  | 			   le16_to_cpu(req->r_pgid.ps)); | ||||||
|  | 
 | ||||||
|  | 		head = req->r_request->front.iov_base; | ||||||
|  | 		op = (void *)(head + 1); | ||||||
|  | 
 | ||||||
|  | 		num_ops = le16_to_cpu(head->num_ops); | ||||||
|  | 		olen = le32_to_cpu(head->object_len); | ||||||
|  | 		seq_printf(s, "%.*s", olen, | ||||||
|  | 			   (const char *)(head->ops + num_ops)); | ||||||
|  | 
 | ||||||
|  | 		if (req->r_reassert_version.epoch) | ||||||
|  | 			seq_printf(s, "\t%u'%llu", | ||||||
|  | 			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||||||
|  | 			   le64_to_cpu(req->r_reassert_version.version)); | ||||||
|  | 		else | ||||||
|  | 			seq_printf(s, "\t"); | ||||||
|  | 
 | ||||||
|  | 		for (i = 0; i < num_ops; i++) { | ||||||
|  | 			opcode = le16_to_cpu(op->op); | ||||||
|  | 			seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||||||
|  | 			op++; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		seq_printf(s, "\n"); | ||||||
|  | 	} | ||||||
|  | 	mutex_unlock(&osdc->request_mutex); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | CEPH_DEFINE_SHOW_FUNC(monmap_show) | ||||||
|  | CEPH_DEFINE_SHOW_FUNC(osdmap_show) | ||||||
|  | CEPH_DEFINE_SHOW_FUNC(monc_show) | ||||||
|  | CEPH_DEFINE_SHOW_FUNC(osdc_show) | ||||||
|  | 
 | ||||||
|  | int ceph_debugfs_init(void) | ||||||
|  | { | ||||||
|  | 	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||||||
|  | 	if (!ceph_debugfs_dir) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ceph_debugfs_cleanup(void) | ||||||
|  | { | ||||||
|  | 	debugfs_remove(ceph_debugfs_dir); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int ceph_debugfs_client_init(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	int ret = -ENOMEM; | ||||||
|  | 	char name[80]; | ||||||
|  | 
 | ||||||
|  | 	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||||||
|  | 		 client->monc.auth->global_id); | ||||||
|  | 
 | ||||||
|  | 	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | ||||||
|  | 	if (!client->debugfs_dir) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	client->monc.debugfs_file = debugfs_create_file("monc", | ||||||
|  | 						      0600, | ||||||
|  | 						      client->debugfs_dir, | ||||||
|  | 						      client, | ||||||
|  | 						      &monc_show_fops); | ||||||
|  | 	if (!client->monc.debugfs_file) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	client->osdc.debugfs_file = debugfs_create_file("osdc", | ||||||
|  | 						      0600, | ||||||
|  | 						      client->debugfs_dir, | ||||||
|  | 						      client, | ||||||
|  | 						      &osdc_show_fops); | ||||||
|  | 	if (!client->osdc.debugfs_file) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	client->debugfs_monmap = debugfs_create_file("monmap", | ||||||
|  | 					0600, | ||||||
|  | 					client->debugfs_dir, | ||||||
|  | 					client, | ||||||
|  | 					&monmap_show_fops); | ||||||
|  | 	if (!client->debugfs_monmap) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	client->debugfs_osdmap = debugfs_create_file("osdmap", | ||||||
|  | 					0600, | ||||||
|  | 					client->debugfs_dir, | ||||||
|  | 					client, | ||||||
|  | 					&osdmap_show_fops); | ||||||
|  | 	if (!client->debugfs_osdmap) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | 
 | ||||||
|  | out: | ||||||
|  | 	ceph_debugfs_client_cleanup(client); | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	debugfs_remove(client->debugfs_osdmap); | ||||||
|  | 	debugfs_remove(client->debugfs_monmap); | ||||||
|  | 	debugfs_remove(client->osdc.debugfs_file); | ||||||
|  | 	debugfs_remove(client->monc.debugfs_file); | ||||||
|  | 	debugfs_remove(client->debugfs_dir); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #else  /* CONFIG_DEBUG_FS */ | ||||||
|  | 
 | ||||||
|  | int ceph_debugfs_init(void) | ||||||
|  | { | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ceph_debugfs_cleanup(void) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int ceph_debugfs_client_init(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif  /* CONFIG_DEBUG_FS */ | ||||||
|  | 
 | ||||||
|  | EXPORT_SYMBOL(ceph_debugfs_init); | ||||||
|  | EXPORT_SYMBOL(ceph_debugfs_cleanup); | ||||||
| @ -1,4 +1,4 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/crc32c.h> | #include <linux/crc32c.h> | ||||||
| #include <linux/ctype.h> | #include <linux/ctype.h> | ||||||
| @ -9,12 +9,14 @@ | |||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/socket.h> | #include <linux/socket.h> | ||||||
| #include <linux/string.h> | #include <linux/string.h> | ||||||
|  | #include <linux/bio.h> | ||||||
|  | #include <linux/blkdev.h> | ||||||
| #include <net/tcp.h> | #include <net/tcp.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include <linux/ceph/libceph.h> | ||||||
| #include "messenger.h" | #include <linux/ceph/messenger.h> | ||||||
| #include "decode.h" | #include <linux/ceph/decode.h> | ||||||
| #include "pagelist.h" | #include <linux/ceph/pagelist.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Ceph uses the messenger to exchange ceph_msg messages with other |  * Ceph uses the messenger to exchange ceph_msg messages with other | ||||||
| @ -48,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | |||||||
| static DEFINE_SPINLOCK(addr_str_lock); | static DEFINE_SPINLOCK(addr_str_lock); | ||||||
| static int last_addr_str; | static int last_addr_str; | ||||||
| 
 | 
 | ||||||
| const char *pr_addr(const struct sockaddr_storage *ss) | const char *ceph_pr_addr(const struct sockaddr_storage *ss) | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
| 	char *s; | 	char *s; | ||||||
| @ -79,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss) | |||||||
| 
 | 
 | ||||||
| 	return s; | 	return s; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_pr_addr); | ||||||
| 
 | 
 | ||||||
| static void encode_my_addr(struct ceph_messenger *msgr) | static void encode_my_addr(struct ceph_messenger *msgr) | ||||||
| { | { | ||||||
| @ -91,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) | |||||||
|  */ |  */ | ||||||
| struct workqueue_struct *ceph_msgr_wq; | struct workqueue_struct *ceph_msgr_wq; | ||||||
| 
 | 
 | ||||||
| int __init ceph_msgr_init(void) | int ceph_msgr_init(void) | ||||||
| { | { | ||||||
| 	ceph_msgr_wq = create_workqueue("ceph-msgr"); | 	ceph_msgr_wq = create_workqueue("ceph-msgr"); | ||||||
| 	if (IS_ERR(ceph_msgr_wq)) { | 	if (IS_ERR(ceph_msgr_wq)) { | ||||||
| @ -102,16 +105,19 @@ int __init ceph_msgr_init(void) | |||||||
| 	} | 	} | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_msgr_init); | ||||||
| 
 | 
 | ||||||
| void ceph_msgr_exit(void) | void ceph_msgr_exit(void) | ||||||
| { | { | ||||||
| 	destroy_workqueue(ceph_msgr_wq); | 	destroy_workqueue(ceph_msgr_wq); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_msgr_exit); | ||||||
| 
 | 
 | ||||||
| void ceph_msgr_flush(void) | void ceph_msgr_flush(void) | ||||||
| { | { | ||||||
| 	flush_workqueue(ceph_msgr_wq); | 	flush_workqueue(ceph_msgr_wq); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_msgr_flush); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -221,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||||||
| 
 | 
 | ||||||
| 	set_sock_callbacks(sock, con); | 	set_sock_callbacks(sock, con); | ||||||
| 
 | 
 | ||||||
| 	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 	dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); | ||||||
| 
 | 
 | ||||||
| 	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | 	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | ||||||
| 				 O_NONBLOCK); | 				 O_NONBLOCK); | ||||||
| 	if (ret == -EINPROGRESS) { | 	if (ret == -EINPROGRESS) { | ||||||
| 		dout("connect %s EINPROGRESS sk_state = %u\n", | 		dout("connect %s EINPROGRESS sk_state = %u\n", | ||||||
| 		     pr_addr(&con->peer_addr.in_addr), | 		     ceph_pr_addr(&con->peer_addr.in_addr), | ||||||
| 		     sock->sk->sk_state); | 		     sock->sk->sk_state); | ||||||
| 		ret = 0; | 		ret = 0; | ||||||
| 	} | 	} | ||||||
| 	if (ret < 0) { | 	if (ret < 0) { | ||||||
| 		pr_err("connect %s error %d\n", | 		pr_err("connect %s error %d\n", | ||||||
| 		       pr_addr(&con->peer_addr.in_addr), ret); | 		       ceph_pr_addr(&con->peer_addr.in_addr), ret); | ||||||
| 		sock_release(sock); | 		sock_release(sock); | ||||||
| 		con->sock = NULL; | 		con->sock = NULL; | ||||||
| 		con->error_msg = "connect error"; | 		con->error_msg = "connect error"; | ||||||
| @ -334,7 +340,8 @@ static void reset_connection(struct ceph_connection *con) | |||||||
|  */ |  */ | ||||||
| void ceph_con_close(struct ceph_connection *con) | void ceph_con_close(struct ceph_connection *con) | ||||||
| { | { | ||||||
| 	dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); | 	dout("con_close %p peer %s\n", con, | ||||||
|  | 	     ceph_pr_addr(&con->peer_addr.in_addr)); | ||||||
| 	set_bit(CLOSED, &con->state);  /* in case there's queued work */ | 	set_bit(CLOSED, &con->state);  /* in case there's queued work */ | ||||||
| 	clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */ | 	clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */ | ||||||
| 	clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */ | 	clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */ | ||||||
| @ -347,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con) | |||||||
| 	mutex_unlock(&con->mutex); | 	mutex_unlock(&con->mutex); | ||||||
| 	queue_con(con); | 	queue_con(con); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_con_close); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Reopen a closed connection, with a new peer address. |  * Reopen a closed connection, with a new peer address. | ||||||
|  */ |  */ | ||||||
| void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | ||||||
| { | { | ||||||
| 	dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); | 	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); | ||||||
| 	set_bit(OPENING, &con->state); | 	set_bit(OPENING, &con->state); | ||||||
| 	clear_bit(CLOSED, &con->state); | 	clear_bit(CLOSED, &con->state); | ||||||
| 	memcpy(&con->peer_addr, addr, sizeof(*addr)); | 	memcpy(&con->peer_addr, addr, sizeof(*addr)); | ||||||
| 	con->delay = 0;      /* reset backoff memory */ | 	con->delay = 0;      /* reset backoff memory */ | ||||||
| 	queue_con(con); | 	queue_con(con); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_con_open); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * return true if this connection ever successfully opened |  * return true if this connection ever successfully opened | ||||||
| @ -406,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | |||||||
| 	INIT_LIST_HEAD(&con->out_sent); | 	INIT_LIST_HEAD(&con->out_sent); | ||||||
| 	INIT_DELAYED_WORK(&con->work, con_work); | 	INIT_DELAYED_WORK(&con->work, con_work); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_con_init); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -529,8 +539,11 @@ static void prepare_write_message(struct ceph_connection *con) | |||||||
| 	if (le32_to_cpu(m->hdr.data_len) > 0) { | 	if (le32_to_cpu(m->hdr.data_len) > 0) { | ||||||
| 		/* initialize page iterator */ | 		/* initialize page iterator */ | ||||||
| 		con->out_msg_pos.page = 0; | 		con->out_msg_pos.page = 0; | ||||||
| 		con->out_msg_pos.page_pos = | 		if (m->pages) | ||||||
| 			le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | 			con->out_msg_pos.page_pos = | ||||||
|  | 				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||||||
|  | 		else | ||||||
|  | 			con->out_msg_pos.page_pos = 0; | ||||||
| 		con->out_msg_pos.data_pos = 0; | 		con->out_msg_pos.data_pos = 0; | ||||||
| 		con->out_msg_pos.did_page_crc = 0; | 		con->out_msg_pos.did_page_crc = 0; | ||||||
| 		con->out_more = 1;  /* data + footer will follow */ | 		con->out_more = 1;  /* data + footer will follow */ | ||||||
| @ -647,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||||||
| 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | ||||||
| 	     con->connect_seq, global_seq, proto); | 	     con->connect_seq, global_seq, proto); | ||||||
| 
 | 
 | ||||||
| 	con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); | 	con->out_connect.features = cpu_to_le64(msgr->supported_features); | ||||||
| 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | ||||||
| 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | ||||||
| 	con->out_connect.global_seq = cpu_to_le32(global_seq); | 	con->out_connect.global_seq = cpu_to_le32(global_seq); | ||||||
| @ -712,6 +725,31 @@ out: | |||||||
| 	return ret;  /* done! */ | 	return ret;  /* done! */ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | ||||||
|  | { | ||||||
|  | 	if (!bio) { | ||||||
|  | 		*iter = NULL; | ||||||
|  | 		*seg = 0; | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 	*iter = bio; | ||||||
|  | 	*seg = bio->bi_idx; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void iter_bio_next(struct bio **bio_iter, int *seg) | ||||||
|  | { | ||||||
|  | 	if (*bio_iter == NULL) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | ||||||
|  | 
 | ||||||
|  | 	(*seg)++; | ||||||
|  | 	if (*seg == (*bio_iter)->bi_vcnt) | ||||||
|  | 		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Write as much message data payload as we can.  If we finish, queue |  * Write as much message data payload as we can.  If we finish, queue | ||||||
|  * up the footer. |  * up the footer. | ||||||
| @ -726,21 +764,46 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||||||
| 	size_t len; | 	size_t len; | ||||||
| 	int crc = con->msgr->nocrc; | 	int crc = con->msgr->nocrc; | ||||||
| 	int ret; | 	int ret; | ||||||
|  | 	int total_max_write; | ||||||
|  | 	int in_trail = 0; | ||||||
|  | 	size_t trail_len = (msg->trail ? msg->trail->length : 0); | ||||||
| 
 | 
 | ||||||
| 	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | 	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | ||||||
| 	     con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | 	     con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | ||||||
| 	     con->out_msg_pos.page_pos); | 	     con->out_msg_pos.page_pos); | ||||||
| 
 | 
 | ||||||
| 	while (con->out_msg_pos.page < con->out_msg->nr_pages) { | #ifdef CONFIG_BLOCK | ||||||
|  | 	if (msg->bio && !msg->bio_iter) | ||||||
|  | 		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 	while (data_len > con->out_msg_pos.data_pos) { | ||||||
| 		struct page *page = NULL; | 		struct page *page = NULL; | ||||||
| 		void *kaddr = NULL; | 		void *kaddr = NULL; | ||||||
|  | 		int max_write = PAGE_SIZE; | ||||||
|  | 		int page_shift = 0; | ||||||
|  | 
 | ||||||
|  | 		total_max_write = data_len - trail_len - | ||||||
|  | 			con->out_msg_pos.data_pos; | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * if we are calculating the data crc (the default), we need | 		 * if we are calculating the data crc (the default), we need | ||||||
| 		 * to map the page.  if our pages[] has been revoked, use the | 		 * to map the page.  if our pages[] has been revoked, use the | ||||||
| 		 * zero page. | 		 * zero page. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (msg->pages) { | 
 | ||||||
|  | 		/* have we reached the trail part of the data? */ | ||||||
|  | 		if (con->out_msg_pos.data_pos >= data_len - trail_len) { | ||||||
|  | 			in_trail = 1; | ||||||
|  | 
 | ||||||
|  | 			total_max_write = data_len - con->out_msg_pos.data_pos; | ||||||
|  | 
 | ||||||
|  | 			page = list_first_entry(&msg->trail->head, | ||||||
|  | 						struct page, lru); | ||||||
|  | 			if (crc) | ||||||
|  | 				kaddr = kmap(page); | ||||||
|  | 			max_write = PAGE_SIZE; | ||||||
|  | 		} else if (msg->pages) { | ||||||
| 			page = msg->pages[con->out_msg_pos.page]; | 			page = msg->pages[con->out_msg_pos.page]; | ||||||
| 			if (crc) | 			if (crc) | ||||||
| 				kaddr = kmap(page); | 				kaddr = kmap(page); | ||||||
| @ -749,13 +812,25 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||||||
| 						struct page, lru); | 						struct page, lru); | ||||||
| 			if (crc) | 			if (crc) | ||||||
| 				kaddr = kmap(page); | 				kaddr = kmap(page); | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 		} else if (msg->bio) { | ||||||
|  | 			struct bio_vec *bv; | ||||||
|  | 
 | ||||||
|  | 			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | ||||||
|  | 			page = bv->bv_page; | ||||||
|  | 			page_shift = bv->bv_offset; | ||||||
|  | 			if (crc) | ||||||
|  | 				kaddr = kmap(page) + page_shift; | ||||||
|  | 			max_write = bv->bv_len; | ||||||
|  | #endif | ||||||
| 		} else { | 		} else { | ||||||
| 			page = con->msgr->zero_page; | 			page = con->msgr->zero_page; | ||||||
| 			if (crc) | 			if (crc) | ||||||
| 				kaddr = page_address(con->msgr->zero_page); | 				kaddr = page_address(con->msgr->zero_page); | ||||||
| 		} | 		} | ||||||
| 		len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos), | 		len = min_t(int, max_write - con->out_msg_pos.page_pos, | ||||||
| 			  (int)(data_len - con->out_msg_pos.data_pos)); | 			    total_max_write); | ||||||
|  | 
 | ||||||
| 		if (crc && !con->out_msg_pos.did_page_crc) { | 		if (crc && !con->out_msg_pos.did_page_crc) { | ||||||
| 			void *base = kaddr + con->out_msg_pos.page_pos; | 			void *base = kaddr + con->out_msg_pos.page_pos; | ||||||
| 			u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 			u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | ||||||
| @ -765,13 +840,14 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||||||
| 				cpu_to_le32(crc32c(tmpcrc, base, len)); | 				cpu_to_le32(crc32c(tmpcrc, base, len)); | ||||||
| 			con->out_msg_pos.did_page_crc = 1; | 			con->out_msg_pos.did_page_crc = 1; | ||||||
| 		} | 		} | ||||||
| 
 |  | ||||||
| 		ret = kernel_sendpage(con->sock, page, | 		ret = kernel_sendpage(con->sock, page, | ||||||
| 				      con->out_msg_pos.page_pos, len, | 				      con->out_msg_pos.page_pos + page_shift, | ||||||
|  | 				      len, | ||||||
| 				      MSG_DONTWAIT | MSG_NOSIGNAL | | 				      MSG_DONTWAIT | MSG_NOSIGNAL | | ||||||
| 				      MSG_MORE); | 				      MSG_MORE); | ||||||
| 
 | 
 | ||||||
| 		if (crc && (msg->pages || msg->pagelist)) | 		if (crc && | ||||||
|  | 		    (msg->pages || msg->pagelist || msg->bio || in_trail)) | ||||||
| 			kunmap(page); | 			kunmap(page); | ||||||
| 
 | 
 | ||||||
| 		if (ret <= 0) | 		if (ret <= 0) | ||||||
| @ -783,9 +859,16 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||||||
| 			con->out_msg_pos.page_pos = 0; | 			con->out_msg_pos.page_pos = 0; | ||||||
| 			con->out_msg_pos.page++; | 			con->out_msg_pos.page++; | ||||||
| 			con->out_msg_pos.did_page_crc = 0; | 			con->out_msg_pos.did_page_crc = 0; | ||||||
| 			if (msg->pagelist) | 			if (in_trail) | ||||||
|  | 				list_move_tail(&page->lru, | ||||||
|  | 					       &msg->trail->head); | ||||||
|  | 			else if (msg->pagelist) | ||||||
| 				list_move_tail(&page->lru, | 				list_move_tail(&page->lru, | ||||||
| 					       &msg->pagelist->head); | 					       &msg->pagelist->head); | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 			else if (msg->bio) | ||||||
|  | 				iter_bio_next(&msg->bio_iter, &msg->bio_seg); | ||||||
|  | #endif | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| @ -938,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con) | |||||||
| { | { | ||||||
| 	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | 	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | ||||||
| 		pr_err("connect to %s got bad banner\n", | 		pr_err("connect to %s got bad banner\n", | ||||||
| 		       pr_addr(&con->peer_addr.in_addr)); | 		       ceph_pr_addr(&con->peer_addr.in_addr)); | ||||||
| 		con->error_msg = "protocol error, bad banner"; | 		con->error_msg = "protocol error, bad banner"; | ||||||
| 		return -1; | 		return -1; | ||||||
| 	} | 	} | ||||||
| @ -1041,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end, | |||||||
| 
 | 
 | ||||||
| 		addr_set_port(ss, port); | 		addr_set_port(ss, port); | ||||||
| 
 | 
 | ||||||
| 		dout("parse_ips got %s\n", pr_addr(ss)); | 		dout("parse_ips got %s\n", ceph_pr_addr(ss)); | ||||||
| 
 | 
 | ||||||
| 		if (p == end) | 		if (p == end) | ||||||
| 			break; | 			break; | ||||||
| @ -1061,6 +1144,7 @@ bad: | |||||||
| 	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | 	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | ||||||
| 	return -EINVAL; | 	return -EINVAL; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_parse_ips); | ||||||
| 
 | 
 | ||||||
| static int process_banner(struct ceph_connection *con) | static int process_banner(struct ceph_connection *con) | ||||||
| { | { | ||||||
| @ -1082,9 +1166,9 @@ static int process_banner(struct ceph_connection *con) | |||||||
| 	    !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 	    !(addr_is_blank(&con->actual_peer_addr.in_addr) && | ||||||
| 	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | ||||||
| 		pr_warning("wrong peer, want %s/%d, got %s/%d\n", | 		pr_warning("wrong peer, want %s/%d, got %s/%d\n", | ||||||
| 			   pr_addr(&con->peer_addr.in_addr), | 			   ceph_pr_addr(&con->peer_addr.in_addr), | ||||||
| 			   (int)le32_to_cpu(con->peer_addr.nonce), | 			   (int)le32_to_cpu(con->peer_addr.nonce), | ||||||
| 			   pr_addr(&con->actual_peer_addr.in_addr), | 			   ceph_pr_addr(&con->actual_peer_addr.in_addr), | ||||||
| 			   (int)le32_to_cpu(con->actual_peer_addr.nonce)); | 			   (int)le32_to_cpu(con->actual_peer_addr.nonce)); | ||||||
| 		con->error_msg = "wrong peer at address"; | 		con->error_msg = "wrong peer at address"; | ||||||
| 		return -1; | 		return -1; | ||||||
| @ -1102,7 +1186,7 @@ static int process_banner(struct ceph_connection *con) | |||||||
| 		addr_set_port(&con->msgr->inst.addr.in_addr, port); | 		addr_set_port(&con->msgr->inst.addr.in_addr, port); | ||||||
| 		encode_my_addr(con->msgr); | 		encode_my_addr(con->msgr); | ||||||
| 		dout("process_banner learned my addr is %s\n", | 		dout("process_banner learned my addr is %s\n", | ||||||
| 		     pr_addr(&con->msgr->inst.addr.in_addr)); | 		     ceph_pr_addr(&con->msgr->inst.addr.in_addr)); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	set_bit(NEGOTIATING, &con->state); | 	set_bit(NEGOTIATING, &con->state); | ||||||
| @ -1123,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con) | |||||||
| 
 | 
 | ||||||
| static int process_connect(struct ceph_connection *con) | static int process_connect(struct ceph_connection *con) | ||||||
| { | { | ||||||
| 	u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 	u64 sup_feat = con->msgr->supported_features; | ||||||
| 	u64 req_feat = CEPH_FEATURE_REQUIRED; | 	u64 req_feat = con->msgr->required_features; | ||||||
| 	u64 server_feat = le64_to_cpu(con->in_reply.features); | 	u64 server_feat = le64_to_cpu(con->in_reply.features); | ||||||
| 
 | 
 | ||||||
| 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | ||||||
| @ -1134,7 +1218,7 @@ static int process_connect(struct ceph_connection *con) | |||||||
| 		pr_err("%s%lld %s feature set mismatch," | 		pr_err("%s%lld %s feature set mismatch," | ||||||
| 		       " my %llx < server's %llx, missing %llx\n", | 		       " my %llx < server's %llx, missing %llx\n", | ||||||
| 		       ENTITY_NAME(con->peer_name), | 		       ENTITY_NAME(con->peer_name), | ||||||
| 		       pr_addr(&con->peer_addr.in_addr), | 		       ceph_pr_addr(&con->peer_addr.in_addr), | ||||||
| 		       sup_feat, server_feat, server_feat & ~sup_feat); | 		       sup_feat, server_feat, server_feat & ~sup_feat); | ||||||
| 		con->error_msg = "missing required protocol features"; | 		con->error_msg = "missing required protocol features"; | ||||||
| 		fail_protocol(con); | 		fail_protocol(con); | ||||||
| @ -1144,7 +1228,7 @@ static int process_connect(struct ceph_connection *con) | |||||||
| 		pr_err("%s%lld %s protocol version mismatch," | 		pr_err("%s%lld %s protocol version mismatch," | ||||||
| 		       " my %d != server's %d\n", | 		       " my %d != server's %d\n", | ||||||
| 		       ENTITY_NAME(con->peer_name), | 		       ENTITY_NAME(con->peer_name), | ||||||
| 		       pr_addr(&con->peer_addr.in_addr), | 		       ceph_pr_addr(&con->peer_addr.in_addr), | ||||||
| 		       le32_to_cpu(con->out_connect.protocol_version), | 		       le32_to_cpu(con->out_connect.protocol_version), | ||||||
| 		       le32_to_cpu(con->in_reply.protocol_version)); | 		       le32_to_cpu(con->in_reply.protocol_version)); | ||||||
| 		con->error_msg = "protocol version mismatch"; | 		con->error_msg = "protocol version mismatch"; | ||||||
| @ -1178,7 +1262,7 @@ static int process_connect(struct ceph_connection *con) | |||||||
| 		     le32_to_cpu(con->in_connect.connect_seq)); | 		     le32_to_cpu(con->in_connect.connect_seq)); | ||||||
| 		pr_err("%s%lld %s connection reset\n", | 		pr_err("%s%lld %s connection reset\n", | ||||||
| 		       ENTITY_NAME(con->peer_name), | 		       ENTITY_NAME(con->peer_name), | ||||||
| 		       pr_addr(&con->peer_addr.in_addr)); | 		       ceph_pr_addr(&con->peer_addr.in_addr)); | ||||||
| 		reset_connection(con); | 		reset_connection(con); | ||||||
| 		prepare_write_connect(con->msgr, con, 0); | 		prepare_write_connect(con->msgr, con, 0); | ||||||
| 		prepare_read_connect(con); | 		prepare_read_connect(con); | ||||||
| @ -1223,7 +1307,7 @@ static int process_connect(struct ceph_connection *con) | |||||||
| 			pr_err("%s%lld %s protocol feature mismatch," | 			pr_err("%s%lld %s protocol feature mismatch," | ||||||
| 			       " my required %llx > server's %llx, need %llx\n", | 			       " my required %llx > server's %llx, need %llx\n", | ||||||
| 			       ENTITY_NAME(con->peer_name), | 			       ENTITY_NAME(con->peer_name), | ||||||
| 			       pr_addr(&con->peer_addr.in_addr), | 			       ceph_pr_addr(&con->peer_addr.in_addr), | ||||||
| 			       req_feat, server_feat, req_feat & ~server_feat); | 			       req_feat, server_feat, req_feat & ~server_feat); | ||||||
| 			con->error_msg = "missing required protocol features"; | 			con->error_msg = "missing required protocol features"; | ||||||
| 			fail_protocol(con); | 			fail_protocol(con); | ||||||
| @ -1305,8 +1389,7 @@ static int read_partial_message_section(struct ceph_connection *con, | |||||||
| 					struct kvec *section, | 					struct kvec *section, | ||||||
| 					unsigned int sec_len, u32 *crc) | 					unsigned int sec_len, u32 *crc) | ||||||
| { | { | ||||||
| 	int left; | 	int ret, left; | ||||||
| 	int ret; |  | ||||||
| 
 | 
 | ||||||
| 	BUG_ON(!section); | 	BUG_ON(!section); | ||||||
| 
 | 
 | ||||||
| @ -1329,13 +1412,83 @@ static int read_partial_message_section(struct ceph_connection *con, | |||||||
| static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | ||||||
| 				struct ceph_msg_header *hdr, | 				struct ceph_msg_header *hdr, | ||||||
| 				int *skip); | 				int *skip); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static int read_partial_message_pages(struct ceph_connection *con, | ||||||
|  | 				      struct page **pages, | ||||||
|  | 				      unsigned data_len, int datacrc) | ||||||
|  | { | ||||||
|  | 	void *p; | ||||||
|  | 	int ret; | ||||||
|  | 	int left; | ||||||
|  | 
 | ||||||
|  | 	left = min((int)(data_len - con->in_msg_pos.data_pos), | ||||||
|  | 		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | ||||||
|  | 	/* (page) data */ | ||||||
|  | 	BUG_ON(pages == NULL); | ||||||
|  | 	p = kmap(pages[con->in_msg_pos.page]); | ||||||
|  | 	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||||||
|  | 			       left); | ||||||
|  | 	if (ret > 0 && datacrc) | ||||||
|  | 		con->in_data_crc = | ||||||
|  | 			crc32c(con->in_data_crc, | ||||||
|  | 				  p + con->in_msg_pos.page_pos, ret); | ||||||
|  | 	kunmap(pages[con->in_msg_pos.page]); | ||||||
|  | 	if (ret <= 0) | ||||||
|  | 		return ret; | ||||||
|  | 	con->in_msg_pos.data_pos += ret; | ||||||
|  | 	con->in_msg_pos.page_pos += ret; | ||||||
|  | 	if (con->in_msg_pos.page_pos == PAGE_SIZE) { | ||||||
|  | 		con->in_msg_pos.page_pos = 0; | ||||||
|  | 		con->in_msg_pos.page++; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | static int read_partial_message_bio(struct ceph_connection *con, | ||||||
|  | 				    struct bio **bio_iter, int *bio_seg, | ||||||
|  | 				    unsigned data_len, int datacrc) | ||||||
|  | { | ||||||
|  | 	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | ||||||
|  | 	void *p; | ||||||
|  | 	int ret, left; | ||||||
|  | 
 | ||||||
|  | 	if (IS_ERR(bv)) | ||||||
|  | 		return PTR_ERR(bv); | ||||||
|  | 
 | ||||||
|  | 	left = min((int)(data_len - con->in_msg_pos.data_pos), | ||||||
|  | 		   (int)(bv->bv_len - con->in_msg_pos.page_pos)); | ||||||
|  | 
 | ||||||
|  | 	p = kmap(bv->bv_page) + bv->bv_offset; | ||||||
|  | 
 | ||||||
|  | 	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||||||
|  | 			       left); | ||||||
|  | 	if (ret > 0 && datacrc) | ||||||
|  | 		con->in_data_crc = | ||||||
|  | 			crc32c(con->in_data_crc, | ||||||
|  | 				  p + con->in_msg_pos.page_pos, ret); | ||||||
|  | 	kunmap(bv->bv_page); | ||||||
|  | 	if (ret <= 0) | ||||||
|  | 		return ret; | ||||||
|  | 	con->in_msg_pos.data_pos += ret; | ||||||
|  | 	con->in_msg_pos.page_pos += ret; | ||||||
|  | 	if (con->in_msg_pos.page_pos == bv->bv_len) { | ||||||
|  | 		con->in_msg_pos.page_pos = 0; | ||||||
|  | 		iter_bio_next(bio_iter, bio_seg); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * read (part of) a message. |  * read (part of) a message. | ||||||
|  */ |  */ | ||||||
| static int read_partial_message(struct ceph_connection *con) | static int read_partial_message(struct ceph_connection *con) | ||||||
| { | { | ||||||
| 	struct ceph_msg *m = con->in_msg; | 	struct ceph_msg *m = con->in_msg; | ||||||
| 	void *p; |  | ||||||
| 	int ret; | 	int ret; | ||||||
| 	int to, left; | 	int to, left; | ||||||
| 	unsigned front_len, middle_len, data_len, data_off; | 	unsigned front_len, middle_len, data_len, data_off; | ||||||
| @ -1381,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con) | |||||||
| 	if ((s64)seq - (s64)con->in_seq < 1) { | 	if ((s64)seq - (s64)con->in_seq < 1) { | ||||||
| 		pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | 		pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | ||||||
| 			ENTITY_NAME(con->peer_name), | 			ENTITY_NAME(con->peer_name), | ||||||
| 			pr_addr(&con->peer_addr.in_addr), | 			ceph_pr_addr(&con->peer_addr.in_addr), | ||||||
| 			seq, con->in_seq + 1); | 			seq, con->in_seq + 1); | ||||||
| 		con->in_base_pos = -front_len - middle_len - data_len - | 		con->in_base_pos = -front_len - middle_len - data_len - | ||||||
| 			sizeof(m->footer); | 			sizeof(m->footer); | ||||||
| @ -1422,7 +1575,10 @@ static int read_partial_message(struct ceph_connection *con) | |||||||
| 			m->middle->vec.iov_len = 0; | 			m->middle->vec.iov_len = 0; | ||||||
| 
 | 
 | ||||||
| 		con->in_msg_pos.page = 0; | 		con->in_msg_pos.page = 0; | ||||||
| 		con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | 		if (m->pages) | ||||||
|  | 			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | ||||||
|  | 		else | ||||||
|  | 			con->in_msg_pos.page_pos = 0; | ||||||
| 		con->in_msg_pos.data_pos = 0; | 		con->in_msg_pos.data_pos = 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| @ -1440,27 +1596,29 @@ static int read_partial_message(struct ceph_connection *con) | |||||||
| 		if (ret <= 0) | 		if (ret <= 0) | ||||||
| 			return ret; | 			return ret; | ||||||
| 	} | 	} | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 	if (m->bio && !m->bio_iter) | ||||||
|  | 		init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| 	/* (page) data */ | 	/* (page) data */ | ||||||
| 	while (con->in_msg_pos.data_pos < data_len) { | 	while (con->in_msg_pos.data_pos < data_len) { | ||||||
| 		left = min((int)(data_len - con->in_msg_pos.data_pos), | 		if (m->pages) { | ||||||
| 			   (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | 			ret = read_partial_message_pages(con, m->pages, | ||||||
| 		BUG_ON(m->pages == NULL); | 						 data_len, datacrc); | ||||||
| 		p = kmap(m->pages[con->in_msg_pos.page]); | 			if (ret <= 0) | ||||||
| 		ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 				return ret; | ||||||
| 				       left); | #ifdef CONFIG_BLOCK | ||||||
| 		if (ret > 0 && datacrc) | 		} else if (m->bio) { | ||||||
| 			con->in_data_crc = | 
 | ||||||
| 				crc32c(con->in_data_crc, | 			ret = read_partial_message_bio(con, | ||||||
| 					  p + con->in_msg_pos.page_pos, ret); | 						 &m->bio_iter, &m->bio_seg, | ||||||
| 		kunmap(m->pages[con->in_msg_pos.page]); | 						 data_len, datacrc); | ||||||
| 		if (ret <= 0) | 			if (ret <= 0) | ||||||
| 			return ret; | 				return ret; | ||||||
| 		con->in_msg_pos.data_pos += ret; | #endif | ||||||
| 		con->in_msg_pos.page_pos += ret; | 		} else { | ||||||
| 		if (con->in_msg_pos.page_pos == PAGE_SIZE) { | 			BUG_ON(1); | ||||||
| 			con->in_msg_pos.page_pos = 0; |  | ||||||
| 			con->in_msg_pos.page++; |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| @ -1874,9 +2032,9 @@ out: | |||||||
| static void ceph_fault(struct ceph_connection *con) | static void ceph_fault(struct ceph_connection *con) | ||||||
| { | { | ||||||
| 	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | 	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | ||||||
| 	       pr_addr(&con->peer_addr.in_addr), con->error_msg); | 	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | ||||||
| 	dout("fault %p state %lu to peer %s\n", | 	dout("fault %p state %lu to peer %s\n", | ||||||
| 	     con, con->state, pr_addr(&con->peer_addr.in_addr)); | 	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); | ||||||
| 
 | 
 | ||||||
| 	if (test_bit(LOSSYTX, &con->state)) { | 	if (test_bit(LOSSYTX, &con->state)) { | ||||||
| 		dout("fault on LOSSYTX channel\n"); | 		dout("fault on LOSSYTX channel\n"); | ||||||
| @ -1936,7 +2094,9 @@ out: | |||||||
| /*
 | /*
 | ||||||
|  * create a new messenger instance |  * create a new messenger instance | ||||||
|  */ |  */ | ||||||
| struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, | ||||||
|  | 					     u32 supported_features, | ||||||
|  | 					     u32 required_features) | ||||||
| { | { | ||||||
| 	struct ceph_messenger *msgr; | 	struct ceph_messenger *msgr; | ||||||
| 
 | 
 | ||||||
| @ -1944,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||||||
| 	if (msgr == NULL) | 	if (msgr == NULL) | ||||||
| 		return ERR_PTR(-ENOMEM); | 		return ERR_PTR(-ENOMEM); | ||||||
| 
 | 
 | ||||||
|  | 	msgr->supported_features = supported_features; | ||||||
|  | 	msgr->required_features = required_features; | ||||||
|  | 
 | ||||||
| 	spin_lock_init(&msgr->global_seq_lock); | 	spin_lock_init(&msgr->global_seq_lock); | ||||||
| 
 | 
 | ||||||
| 	/* the zero page is needed if a request is "canceled" while the message
 | 	/* the zero page is needed if a request is "canceled" while the message
 | ||||||
| @ -1966,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||||||
| 	dout("messenger_create %p\n", msgr); | 	dout("messenger_create %p\n", msgr); | ||||||
| 	return msgr; | 	return msgr; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_messenger_create); | ||||||
| 
 | 
 | ||||||
| void ceph_messenger_destroy(struct ceph_messenger *msgr) | void ceph_messenger_destroy(struct ceph_messenger *msgr) | ||||||
| { | { | ||||||
| @ -1975,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) | |||||||
| 	kfree(msgr); | 	kfree(msgr); | ||||||
| 	dout("destroyed messenger %p\n", msgr); | 	dout("destroyed messenger %p\n", msgr); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_messenger_destroy); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Queue up an outgoing message on the given connection. |  * Queue up an outgoing message on the given connection. | ||||||
| @ -2011,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||||||
| 	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | ||||||
| 		queue_con(con); | 		queue_con(con); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_con_send); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Revoke a message that was previously queued for send |  * Revoke a message that was previously queued for send | ||||||
| @ -2076,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||||||
| 	    test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 	    test_and_set_bit(WRITE_PENDING, &con->state) == 0) | ||||||
| 		queue_con(con); | 		queue_con(con); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_con_keepalive); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -2136,6 +2303,10 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | |||||||
| 	m->nr_pages = 0; | 	m->nr_pages = 0; | ||||||
| 	m->pages = NULL; | 	m->pages = NULL; | ||||||
| 	m->pagelist = NULL; | 	m->pagelist = NULL; | ||||||
|  | 	m->bio = NULL; | ||||||
|  | 	m->bio_iter = NULL; | ||||||
|  | 	m->bio_seg = 0; | ||||||
|  | 	m->trail = NULL; | ||||||
| 
 | 
 | ||||||
| 	dout("ceph_msg_new %p front %d\n", m, front_len); | 	dout("ceph_msg_new %p front %d\n", m, front_len); | ||||||
| 	return m; | 	return m; | ||||||
| @ -2146,6 +2317,7 @@ out: | |||||||
| 	pr_err("msg_new can't create type %d front %d\n", type, front_len); | 	pr_err("msg_new can't create type %d front %d\n", type, front_len); | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_msg_new); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Allocate "middle" portion of a message, if it is needed and wasn't |  * Allocate "middle" portion of a message, if it is needed and wasn't | ||||||
| @ -2250,11 +2422,14 @@ void ceph_msg_last_put(struct kref *kref) | |||||||
| 		m->pagelist = NULL; | 		m->pagelist = NULL; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	m->trail = NULL; | ||||||
|  | 
 | ||||||
| 	if (m->pool) | 	if (m->pool) | ||||||
| 		ceph_msgpool_put(m->pool, m); | 		ceph_msgpool_put(m->pool, m); | ||||||
| 	else | 	else | ||||||
| 		ceph_msg_kfree(m); | 		ceph_msg_kfree(m); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_msg_last_put); | ||||||
| 
 | 
 | ||||||
| void ceph_msg_dump(struct ceph_msg *msg) | void ceph_msg_dump(struct ceph_msg *msg) | ||||||
| { | { | ||||||
| @ -2275,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg) | |||||||
| 		       DUMP_PREFIX_OFFSET, 16, 1, | 		       DUMP_PREFIX_OFFSET, 16, 1, | ||||||
| 		       &msg->footer, sizeof(msg->footer), true); | 		       &msg->footer, sizeof(msg->footer), true); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_msg_dump); | ||||||
| @ -1,14 +1,16 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/module.h> | ||||||
| #include <linux/types.h> | #include <linux/types.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
| #include <linux/sched.h> | #include <linux/sched.h> | ||||||
| 
 | 
 | ||||||
| #include "mon_client.h" | #include <linux/ceph/mon_client.h> | ||||||
| #include "super.h" | #include <linux/ceph/libceph.h> | ||||||
| #include "auth.h" | #include <linux/ceph/decode.h> | ||||||
| #include "decode.h" | 
 | ||||||
|  | #include <linux/ceph/auth.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Interact with Ceph monitor cluster.  Handle requests for new map |  * Interact with Ceph monitor cluster.  Handle requests for new map | ||||||
| @ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) | |||||||
| 	     m->num_mon); | 	     m->num_mon); | ||||||
| 	for (i = 0; i < m->num_mon; i++) | 	for (i = 0; i < m->num_mon; i++) | ||||||
| 		dout("monmap_decode  mon%d is %s\n", i, | 		dout("monmap_decode  mon%d is %s\n", i, | ||||||
| 		     pr_addr(&m->mon_inst[i].addr.in_addr)); | 		     ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); | ||||||
| 	return m; | 	return m; | ||||||
| 
 | 
 | ||||||
| bad: | bad: | ||||||
| @ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||||||
| 		struct ceph_msg *msg = monc->m_subscribe; | 		struct ceph_msg *msg = monc->m_subscribe; | ||||||
| 		struct ceph_mon_subscribe_item *i; | 		struct ceph_mon_subscribe_item *i; | ||||||
| 		void *p, *end; | 		void *p, *end; | ||||||
|  | 		int num; | ||||||
| 
 | 
 | ||||||
| 		p = msg->front.iov_base; | 		p = msg->front.iov_base; | ||||||
| 		end = p + msg->front_max; | 		end = p + msg->front_max; | ||||||
| 
 | 
 | ||||||
| 		dout("__send_subscribe to 'mdsmap' %u+\n", | 		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; | ||||||
| 		     (unsigned)monc->have_mdsmap); | 		ceph_encode_32(&p, num); | ||||||
|  | 
 | ||||||
| 		if (monc->want_next_osdmap) { | 		if (monc->want_next_osdmap) { | ||||||
| 			dout("__send_subscribe to 'osdmap' %u\n", | 			dout("__send_subscribe to 'osdmap' %u\n", | ||||||
| 			     (unsigned)monc->have_osdmap); | 			     (unsigned)monc->have_osdmap); | ||||||
| 			ceph_encode_32(&p, 3); |  | ||||||
| 			ceph_encode_string(&p, end, "osdmap", 6); | 			ceph_encode_string(&p, end, "osdmap", 6); | ||||||
| 			i = p; | 			i = p; | ||||||
| 			i->have = cpu_to_le64(monc->have_osdmap); | 			i->have = cpu_to_le64(monc->have_osdmap); | ||||||
| 			i->onetime = 1; | 			i->onetime = 1; | ||||||
| 			p += sizeof(*i); | 			p += sizeof(*i); | ||||||
| 			monc->want_next_osdmap = 2;  /* requested */ | 			monc->want_next_osdmap = 2;  /* requested */ | ||||||
| 		} else { |  | ||||||
| 			ceph_encode_32(&p, 2); |  | ||||||
| 		} | 		} | ||||||
| 		ceph_encode_string(&p, end, "mdsmap", 6); | 		if (monc->want_mdsmap) { | ||||||
| 		i = p; | 			dout("__send_subscribe to 'mdsmap' %u+\n", | ||||||
| 		i->have = cpu_to_le64(monc->have_mdsmap); | 			     (unsigned)monc->have_mdsmap); | ||||||
| 		i->onetime = 0; | 			ceph_encode_string(&p, end, "mdsmap", 6); | ||||||
| 		p += sizeof(*i); | 			i = p; | ||||||
|  | 			i->have = cpu_to_le64(monc->have_mdsmap); | ||||||
|  | 			i->onetime = 0; | ||||||
|  | 			p += sizeof(*i); | ||||||
|  | 		} | ||||||
| 		ceph_encode_string(&p, end, "monmap", 6); | 		ceph_encode_string(&p, end, "monmap", 6); | ||||||
| 		i = p; | 		i = p; | ||||||
| 		i->have = 0; | 		i->have = 0; | ||||||
| @ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, | |||||||
| 	mutex_lock(&monc->mutex); | 	mutex_lock(&monc->mutex); | ||||||
| 	if (monc->hunting) { | 	if (monc->hunting) { | ||||||
| 		pr_info("mon%d %s session established\n", | 		pr_info("mon%d %s session established\n", | ||||||
| 			monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); | 			monc->cur_mon, | ||||||
|  | 			ceph_pr_addr(&monc->con->peer_addr.in_addr)); | ||||||
| 		monc->hunting = false; | 		monc->hunting = false; | ||||||
| 	} | 	} | ||||||
| 	dout("handle_subscribe_ack after %d seconds\n", seconds); | 	dout("handle_subscribe_ack after %d seconds\n", seconds); | ||||||
| @ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) | |||||||
| 	mutex_unlock(&monc->mutex); | 	mutex_unlock(&monc->mutex); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_got_mdsmap); | ||||||
| 
 | 
 | ||||||
| int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) | ||||||
| { | { | ||||||
| @ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) | |||||||
| 	mutex_unlock(&monc->mutex); | 	mutex_unlock(&monc->mutex); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_open_session); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * The monitor responds with mount ack indicate mount success.  The |  * The monitor responds with mount ack indicate mount success.  The | ||||||
| @ -540,6 +548,7 @@ out: | |||||||
| 	kref_put(&req->kref, release_generic_request); | 	kref_put(&req->kref, release_generic_request); | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_do_statfs); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * pool ops |  * pool ops | ||||||
| @ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc, | |||||||
| 				   pool, 0, (char *)snapid, sizeof(*snapid)); | 				   pool, 0, (char *)snapid, sizeof(*snapid)); | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_create_snapid); | ||||||
| 
 | 
 | ||||||
| int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||||||
| 			    u32 pool, u64 snapid) | 			    u32 pool, u64 snapid) | ||||||
| @ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work) | |||||||
|  */ |  */ | ||||||
| static int build_initial_monmap(struct ceph_mon_client *monc) | static int build_initial_monmap(struct ceph_mon_client *monc) | ||||||
| { | { | ||||||
| 	struct ceph_mount_args *args = monc->client->mount_args; | 	struct ceph_options *opt = monc->client->options; | ||||||
| 	struct ceph_entity_addr *mon_addr = args->mon_addr; | 	struct ceph_entity_addr *mon_addr = opt->mon_addr; | ||||||
| 	int num_mon = args->num_mon; | 	int num_mon = opt->num_mon; | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	/* build initial monmap */ | 	/* build initial monmap */ | ||||||
| @ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) | |||||||
| 	} | 	} | ||||||
| 	monc->monmap->num_mon = num_mon; | 	monc->monmap->num_mon = num_mon; | ||||||
| 	monc->have_fsid = false; | 	monc->have_fsid = false; | ||||||
| 
 |  | ||||||
| 	/* release addr memory */ |  | ||||||
| 	kfree(args->mon_addr); |  | ||||||
| 	args->mon_addr = NULL; |  | ||||||
| 	args->num_mon = 0; |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||||||
| 	monc->con = NULL; | 	monc->con = NULL; | ||||||
| 
 | 
 | ||||||
| 	/* authentication */ | 	/* authentication */ | ||||||
| 	monc->auth = ceph_auth_init(cl->mount_args->name, | 	monc->auth = ceph_auth_init(cl->options->name, | ||||||
| 				    cl->mount_args->secret); | 				    cl->options->secret); | ||||||
| 	if (IS_ERR(monc->auth)) | 	if (IS_ERR(monc->auth)) | ||||||
| 		return PTR_ERR(monc->auth); | 		return PTR_ERR(monc->auth); | ||||||
| 	monc->auth->want_keys = | 	monc->auth->want_keys = | ||||||
| @ -808,6 +813,7 @@ out_monmap: | |||||||
| out: | out: | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_init); | ||||||
| 
 | 
 | ||||||
| void ceph_monc_stop(struct ceph_mon_client *monc) | void ceph_monc_stop(struct ceph_mon_client *monc) | ||||||
| { | { | ||||||
| @ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||||||
| 
 | 
 | ||||||
| 	kfree(monc->monmap); | 	kfree(monc->monmap); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_stop); | ||||||
| 
 | 
 | ||||||
| static void handle_auth_reply(struct ceph_mon_client *monc, | static void handle_auth_reply(struct ceph_mon_client *monc, | ||||||
| 			      struct ceph_msg *msg) | 			      struct ceph_msg *msg) | ||||||
| @ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc) | |||||||
| 	mutex_unlock(&monc->mutex); | 	mutex_unlock(&monc->mutex); | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_monc_validate_auth); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * handle incoming message |  * handle incoming message | ||||||
| @ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||||||
| 		ceph_monc_handle_map(monc, msg); | 		ceph_monc_handle_map(monc, msg); | ||||||
| 		break; | 		break; | ||||||
| 
 | 
 | ||||||
| 	case CEPH_MSG_MDS_MAP: |  | ||||||
| 		ceph_mdsc_handle_map(&monc->client->mdsc, msg); |  | ||||||
| 		break; |  | ||||||
| 
 |  | ||||||
| 	case CEPH_MSG_OSD_MAP: | 	case CEPH_MSG_OSD_MAP: | ||||||
| 		ceph_osdc_handle_map(&monc->client->osdc, msg); | 		ceph_osdc_handle_map(&monc->client->osdc, msg); | ||||||
| 		break; | 		break; | ||||||
| 
 | 
 | ||||||
| 	default: | 	default: | ||||||
|  | 		/* can the chained handler handle it? */ | ||||||
|  | 		if (monc->client->extra_mon_dispatch && | ||||||
|  | 		    monc->client->extra_mon_dispatch(monc->client, msg) == 0) | ||||||
|  | 			break; | ||||||
|  | 			 | ||||||
| 		pr_err("received unknown message type %d %s\n", type, | 		pr_err("received unknown message type %d %s\n", type, | ||||||
| 		       ceph_msg_type_name(type)); | 		       ceph_msg_type_name(type)); | ||||||
| 	} | 	} | ||||||
| @ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con) | |||||||
| 	if (monc->con && !monc->hunting) | 	if (monc->con && !monc->hunting) | ||||||
| 		pr_info("mon%d %s session lost, " | 		pr_info("mon%d %s session lost, " | ||||||
| 			"hunting for new mon\n", monc->cur_mon, | 			"hunting for new mon\n", monc->cur_mon, | ||||||
| 			pr_addr(&monc->con->peer_addr.in_addr)); | 			ceph_pr_addr(&monc->con->peer_addr.in_addr)); | ||||||
| 
 | 
 | ||||||
| 	__close_session(monc); | 	__close_session(monc); | ||||||
| 	if (!monc->hunting) { | 	if (!monc->hunting) { | ||||||
| @ -1,11 +1,11 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| #include <linux/sched.h> | #include <linux/sched.h> | ||||||
| #include <linux/types.h> | #include <linux/types.h> | ||||||
| #include <linux/vmalloc.h> | #include <linux/vmalloc.h> | ||||||
| 
 | 
 | ||||||
| #include "msgpool.h" | #include <linux/ceph/msgpool.h> | ||||||
| 
 | 
 | ||||||
| static void *alloc_fn(gfp_t gfp_mask, void *arg) | static void *alloc_fn(gfp_t gfp_mask, void *arg) | ||||||
| { | { | ||||||
| @ -1,17 +1,22 @@ | |||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/module.h> | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| #include <linux/highmem.h> | #include <linux/highmem.h> | ||||||
| #include <linux/mm.h> | #include <linux/mm.h> | ||||||
| #include <linux/pagemap.h> | #include <linux/pagemap.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/uaccess.h> | #include <linux/uaccess.h> | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | #include <linux/bio.h> | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include <linux/ceph/libceph.h> | ||||||
| #include "osd_client.h" | #include <linux/ceph/osd_client.h> | ||||||
| #include "messenger.h" | #include <linux/ceph/messenger.h> | ||||||
| #include "decode.h" | #include <linux/ceph/decode.h> | ||||||
| #include "auth.h" | #include <linux/ceph/auth.h> | ||||||
|  | #include <linux/ceph/pagelist.h> | ||||||
| 
 | 
 | ||||||
| #define OSD_OP_FRONT_LEN	4096 | #define OSD_OP_FRONT_LEN	4096 | ||||||
| #define OSD_OPREPLY_FRONT_LEN	512 | #define OSD_OPREPLY_FRONT_LEN	512 | ||||||
| @ -22,6 +27,59 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||||||
| 
 | 
 | ||||||
| static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | ||||||
| 
 | 
 | ||||||
|  | static int op_needs_trail(int op) | ||||||
|  | { | ||||||
|  | 	switch (op) { | ||||||
|  | 	case CEPH_OSD_OP_GETXATTR: | ||||||
|  | 	case CEPH_OSD_OP_SETXATTR: | ||||||
|  | 	case CEPH_OSD_OP_CMPXATTR: | ||||||
|  | 	case CEPH_OSD_OP_CALL: | ||||||
|  | 		return 1; | ||||||
|  | 	default: | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int op_has_extent(int op) | ||||||
|  | { | ||||||
|  | 	return (op == CEPH_OSD_OP_READ || | ||||||
|  | 		op == CEPH_OSD_OP_WRITE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||||||
|  | 			struct ceph_file_layout *layout, | ||||||
|  | 			u64 snapid, | ||||||
|  | 			u64 off, u64 *plen, u64 *bno, | ||||||
|  | 			struct ceph_osd_request *req, | ||||||
|  | 			struct ceph_osd_req_op *op) | ||||||
|  | { | ||||||
|  | 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||||||
|  | 	u64 orig_len = *plen; | ||||||
|  | 	u64 objoff, objlen;    /* extent in object */ | ||||||
|  | 
 | ||||||
|  | 	reqhead->snapid = cpu_to_le64(snapid); | ||||||
|  | 
 | ||||||
|  | 	/* object extent? */ | ||||||
|  | 	ceph_calc_file_object_mapping(layout, off, plen, bno, | ||||||
|  | 				      &objoff, &objlen); | ||||||
|  | 	if (*plen < orig_len) | ||||||
|  | 		dout(" skipping last %llu, final file extent %llu~%llu\n", | ||||||
|  | 		     orig_len - *plen, off, *plen); | ||||||
|  | 
 | ||||||
|  | 	if (op_has_extent(op->op)) { | ||||||
|  | 		op->extent.offset = objoff; | ||||||
|  | 		op->extent.length = objlen; | ||||||
|  | 	} | ||||||
|  | 	req->r_num_pages = calc_pages_for(off, *plen); | ||||||
|  | 	if (op->op == CEPH_OSD_OP_WRITE) | ||||||
|  | 		op->payload_len = *plen; | ||||||
|  | 
 | ||||||
|  | 	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", | ||||||
|  | 	     *bno, objoff, objlen, req->r_num_pages); | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_calc_raw_layout); | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Implement client access to distributed object storage cluster. |  * Implement client access to distributed object storage cluster. | ||||||
|  * |  * | ||||||
| @ -48,34 +106,19 @@ static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | |||||||
|  * fill osd op in request message. |  * fill osd op in request message. | ||||||
|  */ |  */ | ||||||
| static void calc_layout(struct ceph_osd_client *osdc, | static void calc_layout(struct ceph_osd_client *osdc, | ||||||
| 			struct ceph_vino vino, struct ceph_file_layout *layout, | 			struct ceph_vino vino, | ||||||
|  | 			struct ceph_file_layout *layout, | ||||||
| 			u64 off, u64 *plen, | 			u64 off, u64 *plen, | ||||||
| 			struct ceph_osd_request *req) | 			struct ceph_osd_request *req, | ||||||
|  | 			struct ceph_osd_req_op *op) | ||||||
| { | { | ||||||
| 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; |  | ||||||
| 	struct ceph_osd_op *op = (void *)(reqhead + 1); |  | ||||||
| 	u64 orig_len = *plen; |  | ||||||
| 	u64 objoff, objlen;    /* extent in object */ |  | ||||||
| 	u64 bno; | 	u64 bno; | ||||||
| 
 | 
 | ||||||
| 	reqhead->snapid = cpu_to_le64(vino.snap); | 	ceph_calc_raw_layout(osdc, layout, vino.snap, off, | ||||||
| 
 | 			     plen, &bno, req, op); | ||||||
| 	/* object extent? */ |  | ||||||
| 	ceph_calc_file_object_mapping(layout, off, plen, &bno, |  | ||||||
| 				      &objoff, &objlen); |  | ||||||
| 	if (*plen < orig_len) |  | ||||||
| 		dout(" skipping last %llu, final file extent %llu~%llu\n", |  | ||||||
| 		     orig_len - *plen, off, *plen); |  | ||||||
| 
 | 
 | ||||||
| 	sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | 	sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | ||||||
| 	req->r_oid_len = strlen(req->r_oid); | 	req->r_oid_len = strlen(req->r_oid); | ||||||
| 
 |  | ||||||
| 	op->extent.offset = cpu_to_le64(objoff); |  | ||||||
| 	op->extent.length = cpu_to_le64(objlen); |  | ||||||
| 	req->r_num_pages = calc_pages_for(off, *plen); |  | ||||||
| 
 |  | ||||||
| 	dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", |  | ||||||
| 	     req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -101,12 +144,259 @@ void ceph_osdc_release_request(struct kref *kref) | |||||||
| 	if (req->r_own_pages) | 	if (req->r_own_pages) | ||||||
| 		ceph_release_page_vector(req->r_pages, | 		ceph_release_page_vector(req->r_pages, | ||||||
| 					 req->r_num_pages); | 					 req->r_num_pages); | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 	if (req->r_bio) | ||||||
|  | 		bio_put(req->r_bio); | ||||||
|  | #endif | ||||||
| 	ceph_put_snap_context(req->r_snapc); | 	ceph_put_snap_context(req->r_snapc); | ||||||
|  | 	if (req->r_trail) { | ||||||
|  | 		ceph_pagelist_release(req->r_trail); | ||||||
|  | 		kfree(req->r_trail); | ||||||
|  | 	} | ||||||
| 	if (req->r_mempool) | 	if (req->r_mempool) | ||||||
| 		mempool_free(req, req->r_osdc->req_mempool); | 		mempool_free(req, req->r_osdc->req_mempool); | ||||||
| 	else | 	else | ||||||
| 		kfree(req); | 		kfree(req); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_release_request); | ||||||
|  | 
 | ||||||
|  | static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) | ||||||
|  | { | ||||||
|  | 	int i = 0; | ||||||
|  | 
 | ||||||
|  | 	if (needs_trail) | ||||||
|  | 		*needs_trail = 0; | ||||||
|  | 	while (ops[i].op) { | ||||||
|  | 		if (needs_trail && op_needs_trail(ops[i].op)) | ||||||
|  | 			*needs_trail = 1; | ||||||
|  | 		i++; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return i; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | ||||||
|  | 					       int flags, | ||||||
|  | 					       struct ceph_snap_context *snapc, | ||||||
|  | 					       struct ceph_osd_req_op *ops, | ||||||
|  | 					       bool use_mempool, | ||||||
|  | 					       gfp_t gfp_flags, | ||||||
|  | 					       struct page **pages, | ||||||
|  | 					       struct bio *bio) | ||||||
|  | { | ||||||
|  | 	struct ceph_osd_request *req; | ||||||
|  | 	struct ceph_msg *msg; | ||||||
|  | 	int needs_trail; | ||||||
|  | 	int num_op = get_num_ops(ops, &needs_trail); | ||||||
|  | 	size_t msg_size = sizeof(struct ceph_osd_request_head); | ||||||
|  | 
 | ||||||
|  | 	msg_size += num_op*sizeof(struct ceph_osd_op); | ||||||
|  | 
 | ||||||
|  | 	if (use_mempool) { | ||||||
|  | 		req = mempool_alloc(osdc->req_mempool, gfp_flags); | ||||||
|  | 		memset(req, 0, sizeof(*req)); | ||||||
|  | 	} else { | ||||||
|  | 		req = kzalloc(sizeof(*req), gfp_flags); | ||||||
|  | 	} | ||||||
|  | 	if (req == NULL) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	req->r_osdc = osdc; | ||||||
|  | 	req->r_mempool = use_mempool; | ||||||
|  | 
 | ||||||
|  | 	kref_init(&req->r_kref); | ||||||
|  | 	init_completion(&req->r_completion); | ||||||
|  | 	init_completion(&req->r_safe_completion); | ||||||
|  | 	INIT_LIST_HEAD(&req->r_unsafe_item); | ||||||
|  | 	req->r_flags = flags; | ||||||
|  | 
 | ||||||
|  | 	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); | ||||||
|  | 
 | ||||||
|  | 	/* create reply message */ | ||||||
|  | 	if (use_mempool) | ||||||
|  | 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | ||||||
|  | 	else | ||||||
|  | 		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | ||||||
|  | 				   OSD_OPREPLY_FRONT_LEN, gfp_flags); | ||||||
|  | 	if (!msg) { | ||||||
|  | 		ceph_osdc_put_request(req); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	req->r_reply = msg; | ||||||
|  | 
 | ||||||
|  | 	/* allocate space for the trailing data */ | ||||||
|  | 	if (needs_trail) { | ||||||
|  | 		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); | ||||||
|  | 		if (!req->r_trail) { | ||||||
|  | 			ceph_osdc_put_request(req); | ||||||
|  | 			return NULL; | ||||||
|  | 		} | ||||||
|  | 		ceph_pagelist_init(req->r_trail); | ||||||
|  | 	} | ||||||
|  | 	/* create request message; allow space for oid */ | ||||||
|  | 	msg_size += 40; | ||||||
|  | 	if (snapc) | ||||||
|  | 		msg_size += sizeof(u64) * snapc->num_snaps; | ||||||
|  | 	if (use_mempool) | ||||||
|  | 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | ||||||
|  | 	else | ||||||
|  | 		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); | ||||||
|  | 	if (!msg) { | ||||||
|  | 		ceph_osdc_put_request(req); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | ||||||
|  | 	memset(msg->front.iov_base, 0, msg->front.iov_len); | ||||||
|  | 
 | ||||||
|  | 	req->r_request = msg; | ||||||
|  | 	req->r_pages = pages; | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 	if (bio) { | ||||||
|  | 		req->r_bio = bio; | ||||||
|  | 		bio_get(req->r_bio); | ||||||
|  | 	} | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 	return req; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_alloc_request); | ||||||
|  | 
 | ||||||
|  | static void osd_req_encode_op(struct ceph_osd_request *req, | ||||||
|  | 			      struct ceph_osd_op *dst, | ||||||
|  | 			      struct ceph_osd_req_op *src) | ||||||
|  | { | ||||||
|  | 	dst->op = cpu_to_le16(src->op); | ||||||
|  | 
 | ||||||
|  | 	switch (dst->op) { | ||||||
|  | 	case CEPH_OSD_OP_READ: | ||||||
|  | 	case CEPH_OSD_OP_WRITE: | ||||||
|  | 		dst->extent.offset = | ||||||
|  | 			cpu_to_le64(src->extent.offset); | ||||||
|  | 		dst->extent.length = | ||||||
|  | 			cpu_to_le64(src->extent.length); | ||||||
|  | 		dst->extent.truncate_size = | ||||||
|  | 			cpu_to_le64(src->extent.truncate_size); | ||||||
|  | 		dst->extent.truncate_seq = | ||||||
|  | 			cpu_to_le32(src->extent.truncate_seq); | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 	case CEPH_OSD_OP_GETXATTR: | ||||||
|  | 	case CEPH_OSD_OP_SETXATTR: | ||||||
|  | 	case CEPH_OSD_OP_CMPXATTR: | ||||||
|  | 		BUG_ON(!req->r_trail); | ||||||
|  | 
 | ||||||
|  | 		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); | ||||||
|  | 		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); | ||||||
|  | 		dst->xattr.cmp_op = src->xattr.cmp_op; | ||||||
|  | 		dst->xattr.cmp_mode = src->xattr.cmp_mode; | ||||||
|  | 		ceph_pagelist_append(req->r_trail, src->xattr.name, | ||||||
|  | 				     src->xattr.name_len); | ||||||
|  | 		ceph_pagelist_append(req->r_trail, src->xattr.val, | ||||||
|  | 				     src->xattr.value_len); | ||||||
|  | 		break; | ||||||
|  | 	case CEPH_OSD_OP_CALL: | ||||||
|  | 		BUG_ON(!req->r_trail); | ||||||
|  | 
 | ||||||
|  | 		dst->cls.class_len = src->cls.class_len; | ||||||
|  | 		dst->cls.method_len = src->cls.method_len; | ||||||
|  | 		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); | ||||||
|  | 
 | ||||||
|  | 		ceph_pagelist_append(req->r_trail, src->cls.class_name, | ||||||
|  | 				     src->cls.class_len); | ||||||
|  | 		ceph_pagelist_append(req->r_trail, src->cls.method_name, | ||||||
|  | 				     src->cls.method_len); | ||||||
|  | 		ceph_pagelist_append(req->r_trail, src->cls.indata, | ||||||
|  | 				     src->cls.indata_len); | ||||||
|  | 		break; | ||||||
|  | 	case CEPH_OSD_OP_ROLLBACK: | ||||||
|  | 		dst->snap.snapid = cpu_to_le64(src->snap.snapid); | ||||||
|  | 		break; | ||||||
|  | 	case CEPH_OSD_OP_STARTSYNC: | ||||||
|  | 		break; | ||||||
|  | 	default: | ||||||
|  | 		pr_err("unrecognized osd opcode %d\n", dst->op); | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		break; | ||||||
|  | 	} | ||||||
|  | 	dst->payload_len = cpu_to_le32(src->payload_len); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * build new request AND message | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  | void ceph_osdc_build_request(struct ceph_osd_request *req, | ||||||
|  | 			     u64 off, u64 *plen, | ||||||
|  | 			     struct ceph_osd_req_op *src_ops, | ||||||
|  | 			     struct ceph_snap_context *snapc, | ||||||
|  | 			     struct timespec *mtime, | ||||||
|  | 			     const char *oid, | ||||||
|  | 			     int oid_len) | ||||||
|  | { | ||||||
|  | 	struct ceph_msg *msg = req->r_request; | ||||||
|  | 	struct ceph_osd_request_head *head; | ||||||
|  | 	struct ceph_osd_req_op *src_op; | ||||||
|  | 	struct ceph_osd_op *op; | ||||||
|  | 	void *p; | ||||||
|  | 	int num_op = get_num_ops(src_ops, NULL); | ||||||
|  | 	size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | ||||||
|  | 	int flags = req->r_flags; | ||||||
|  | 	u64 data_len = 0; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	head = msg->front.iov_base; | ||||||
|  | 	op = (void *)(head + 1); | ||||||
|  | 	p = (void *)(op + num_op); | ||||||
|  | 
 | ||||||
|  | 	req->r_snapc = ceph_get_snap_context(snapc); | ||||||
|  | 
 | ||||||
|  | 	head->client_inc = cpu_to_le32(1); /* always, for now. */ | ||||||
|  | 	head->flags = cpu_to_le32(flags); | ||||||
|  | 	if (flags & CEPH_OSD_FLAG_WRITE) | ||||||
|  | 		ceph_encode_timespec(&head->mtime, mtime); | ||||||
|  | 	head->num_ops = cpu_to_le16(num_op); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	/* fill in oid */ | ||||||
|  | 	head->object_len = cpu_to_le32(oid_len); | ||||||
|  | 	memcpy(p, oid, oid_len); | ||||||
|  | 	p += oid_len; | ||||||
|  | 
 | ||||||
|  | 	src_op = src_ops; | ||||||
|  | 	while (src_op->op) { | ||||||
|  | 		osd_req_encode_op(req, op, src_op); | ||||||
|  | 		src_op++; | ||||||
|  | 		op++; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (req->r_trail) | ||||||
|  | 		data_len += req->r_trail->length; | ||||||
|  | 
 | ||||||
|  | 	if (snapc) { | ||||||
|  | 		head->snap_seq = cpu_to_le64(snapc->seq); | ||||||
|  | 		head->num_snaps = cpu_to_le32(snapc->num_snaps); | ||||||
|  | 		for (i = 0; i < snapc->num_snaps; i++) { | ||||||
|  | 			put_unaligned_le64(snapc->snaps[i], p); | ||||||
|  | 			p += sizeof(u64); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (flags & CEPH_OSD_FLAG_WRITE) { | ||||||
|  | 		req->r_request->hdr.data_off = cpu_to_le16(off); | ||||||
|  | 		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); | ||||||
|  | 	} else if (data_len) { | ||||||
|  | 		req->r_request->hdr.data_off = 0; | ||||||
|  | 		req->r_request->hdr.data_len = cpu_to_le32(data_len); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | ||||||
|  | 	msg_size = p - msg->front.iov_base; | ||||||
|  | 	msg->front.iov_len = msg_size; | ||||||
|  | 	msg->hdr.front_len = cpu_to_le32(msg_size); | ||||||
|  | 	return; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_build_request); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * build new request AND message, calculate layout, and adjust file |  * build new request AND message, calculate layout, and adjust file | ||||||
| @ -131,110 +421,40 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||||||
| 					       struct timespec *mtime, | 					       struct timespec *mtime, | ||||||
| 					       bool use_mempool, int num_reply) | 					       bool use_mempool, int num_reply) | ||||||
| { | { | ||||||
|  | 	struct ceph_osd_req_op ops[3]; | ||||||
| 	struct ceph_osd_request *req; | 	struct ceph_osd_request *req; | ||||||
| 	struct ceph_msg *msg; |  | ||||||
| 	struct ceph_osd_request_head *head; |  | ||||||
| 	struct ceph_osd_op *op; |  | ||||||
| 	void *p; |  | ||||||
| 	int num_op = 1 + do_sync; |  | ||||||
| 	size_t msg_size = sizeof(*head) + num_op*sizeof(*op); |  | ||||||
| 	int i; |  | ||||||
| 
 | 
 | ||||||
| 	if (use_mempool) { | 	ops[0].op = opcode; | ||||||
| 		req = mempool_alloc(osdc->req_mempool, GFP_NOFS); | 	ops[0].extent.truncate_seq = truncate_seq; | ||||||
| 		memset(req, 0, sizeof(*req)); | 	ops[0].extent.truncate_size = truncate_size; | ||||||
| 	} else { | 	ops[0].payload_len = 0; | ||||||
| 		req = kzalloc(sizeof(*req), GFP_NOFS); |  | ||||||
| 	} |  | ||||||
| 	if (req == NULL) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	req->r_osdc = osdc; |  | ||||||
| 	req->r_mempool = use_mempool; |  | ||||||
| 	kref_init(&req->r_kref); |  | ||||||
| 	init_completion(&req->r_completion); |  | ||||||
| 	init_completion(&req->r_safe_completion); |  | ||||||
| 	INIT_LIST_HEAD(&req->r_unsafe_item); |  | ||||||
| 	req->r_flags = flags; |  | ||||||
| 
 |  | ||||||
| 	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); |  | ||||||
| 
 |  | ||||||
| 	/* create reply message */ |  | ||||||
| 	if (use_mempool) |  | ||||||
| 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |  | ||||||
| 	else |  | ||||||
| 		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |  | ||||||
| 				   OSD_OPREPLY_FRONT_LEN, GFP_NOFS); |  | ||||||
| 	if (!msg) { |  | ||||||
| 		ceph_osdc_put_request(req); |  | ||||||
| 		return NULL; |  | ||||||
| 	} |  | ||||||
| 	req->r_reply = msg; |  | ||||||
| 
 |  | ||||||
| 	/* create request message; allow space for oid */ |  | ||||||
| 	msg_size += 40; |  | ||||||
| 	if (snapc) |  | ||||||
| 		msg_size += sizeof(u64) * snapc->num_snaps; |  | ||||||
| 	if (use_mempool) |  | ||||||
| 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |  | ||||||
| 	else |  | ||||||
| 		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); |  | ||||||
| 	if (!msg) { |  | ||||||
| 		ceph_osdc_put_request(req); |  | ||||||
| 		return NULL; |  | ||||||
| 	} |  | ||||||
| 	msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |  | ||||||
| 	memset(msg->front.iov_base, 0, msg->front.iov_len); |  | ||||||
| 	head = msg->front.iov_base; |  | ||||||
| 	op = (void *)(head + 1); |  | ||||||
| 	p = (void *)(op + num_op); |  | ||||||
| 
 |  | ||||||
| 	req->r_request = msg; |  | ||||||
| 	req->r_snapc = ceph_get_snap_context(snapc); |  | ||||||
| 
 |  | ||||||
| 	head->client_inc = cpu_to_le32(1); /* always, for now. */ |  | ||||||
| 	head->flags = cpu_to_le32(flags); |  | ||||||
| 	if (flags & CEPH_OSD_FLAG_WRITE) |  | ||||||
| 		ceph_encode_timespec(&head->mtime, mtime); |  | ||||||
| 	head->num_ops = cpu_to_le16(num_op); |  | ||||||
| 	op->op = cpu_to_le16(opcode); |  | ||||||
| 
 |  | ||||||
| 	/* calculate max write size */ |  | ||||||
| 	calc_layout(osdc, vino, layout, off, plen, req); |  | ||||||
| 	req->r_file_layout = *layout;  /* keep a copy */ |  | ||||||
| 
 |  | ||||||
| 	if (flags & CEPH_OSD_FLAG_WRITE) { |  | ||||||
| 		req->r_request->hdr.data_off = cpu_to_le16(off); |  | ||||||
| 		req->r_request->hdr.data_len = cpu_to_le32(*plen); |  | ||||||
| 		op->payload_len = cpu_to_le32(*plen); |  | ||||||
| 	} |  | ||||||
| 	op->extent.truncate_size = cpu_to_le64(truncate_size); |  | ||||||
| 	op->extent.truncate_seq = cpu_to_le32(truncate_seq); |  | ||||||
| 
 |  | ||||||
| 	/* fill in oid */ |  | ||||||
| 	head->object_len = cpu_to_le32(req->r_oid_len); |  | ||||||
| 	memcpy(p, req->r_oid, req->r_oid_len); |  | ||||||
| 	p += req->r_oid_len; |  | ||||||
| 
 | 
 | ||||||
| 	if (do_sync) { | 	if (do_sync) { | ||||||
| 		op++; | 		ops[1].op = CEPH_OSD_OP_STARTSYNC; | ||||||
| 		op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); | 		ops[1].payload_len = 0; | ||||||
| 	} | 		ops[2].op = 0; | ||||||
| 	if (snapc) { | 	} else | ||||||
| 		head->snap_seq = cpu_to_le64(snapc->seq); | 		ops[1].op = 0; | ||||||
| 		head->num_snaps = cpu_to_le32(snapc->num_snaps); | 
 | ||||||
| 		for (i = 0; i < snapc->num_snaps; i++) { | 	req = ceph_osdc_alloc_request(osdc, flags, | ||||||
| 			put_unaligned_le64(snapc->snaps[i], p); | 					 snapc, ops, | ||||||
| 			p += sizeof(u64); | 					 use_mempool, | ||||||
| 		} | 					 GFP_NOFS, NULL, NULL); | ||||||
| 	} | 	if (IS_ERR(req)) | ||||||
|  | 		return req; | ||||||
|  | 
 | ||||||
|  | 	/* calculate max write size */ | ||||||
|  | 	calc_layout(osdc, vino, layout, off, plen, req, ops); | ||||||
|  | 	req->r_file_layout = *layout;  /* keep a copy */ | ||||||
|  | 
 | ||||||
|  | 	ceph_osdc_build_request(req, off, plen, ops, | ||||||
|  | 				snapc, | ||||||
|  | 				mtime, | ||||||
|  | 				req->r_oid, req->r_oid_len); | ||||||
| 
 | 
 | ||||||
| 	BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |  | ||||||
| 	msg_size = p - msg->front.iov_base; |  | ||||||
| 	msg->front.iov_len = msg_size; |  | ||||||
| 	msg->hdr.front_len = cpu_to_le32(msg_size); |  | ||||||
| 	return req; | 	return req; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_new_request); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * We keep osd requests in an rbtree, sorted by ->r_tid. |  * We keep osd requests in an rbtree, sorted by ->r_tid. | ||||||
| @ -389,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, | |||||||
| 	dout("__move_osd_to_lru %p\n", osd); | 	dout("__move_osd_to_lru %p\n", osd); | ||||||
| 	BUG_ON(!list_empty(&osd->o_osd_lru)); | 	BUG_ON(!list_empty(&osd->o_osd_lru)); | ||||||
| 	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | 	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | ||||||
| 	osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; | 	osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __remove_osd_from_lru(struct ceph_osd *osd) | static void __remove_osd_from_lru(struct ceph_osd *osd) | ||||||
| @ -483,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) | |||||||
| static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | ||||||
| { | { | ||||||
| 	schedule_delayed_work(&osdc->timeout_work, | 	schedule_delayed_work(&osdc->timeout_work, | ||||||
| 			osdc->client->mount_args->osd_keepalive_timeout * HZ); | 			osdc->client->options->osd_keepalive_timeout * HZ); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | ||||||
| @ -684,9 +904,9 @@ static void handle_timeout(struct work_struct *work) | |||||||
| 		container_of(work, struct ceph_osd_client, timeout_work.work); | 		container_of(work, struct ceph_osd_client, timeout_work.work); | ||||||
| 	struct ceph_osd_request *req, *last_req = NULL; | 	struct ceph_osd_request *req, *last_req = NULL; | ||||||
| 	struct ceph_osd *osd; | 	struct ceph_osd *osd; | ||||||
| 	unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | 	unsigned long timeout = osdc->client->options->osd_timeout * HZ; | ||||||
| 	unsigned long keepalive = | 	unsigned long keepalive = | ||||||
| 		osdc->client->mount_args->osd_keepalive_timeout * HZ; | 		osdc->client->options->osd_keepalive_timeout * HZ; | ||||||
| 	unsigned long last_stamp = 0; | 	unsigned long last_stamp = 0; | ||||||
| 	struct rb_node *p; | 	struct rb_node *p; | ||||||
| 	struct list_head slow_osds; | 	struct list_head slow_osds; | ||||||
| @ -773,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work) | |||||||
| 		container_of(work, struct ceph_osd_client, | 		container_of(work, struct ceph_osd_client, | ||||||
| 			     osds_timeout_work.work); | 			     osds_timeout_work.work); | ||||||
| 	unsigned long delay = | 	unsigned long delay = | ||||||
| 		osdc->client->mount_args->osd_idle_ttl * HZ >> 2; | 		osdc->client->options->osd_idle_ttl * HZ >> 2; | ||||||
| 
 | 
 | ||||||
| 	dout("osds timeout\n"); | 	dout("osds timeout\n"); | ||||||
| 	down_read(&osdc->map_sem); | 	down_read(&osdc->map_sem); | ||||||
| @ -1104,6 +1324,10 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||||||
| 
 | 
 | ||||||
| 	req->r_request->pages = req->r_pages; | 	req->r_request->pages = req->r_pages; | ||||||
| 	req->r_request->nr_pages = req->r_num_pages; | 	req->r_request->nr_pages = req->r_num_pages; | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 	req->r_request->bio = req->r_bio; | ||||||
|  | #endif | ||||||
|  | 	req->r_request->trail = req->r_trail; | ||||||
| 
 | 
 | ||||||
| 	register_request(osdc, req); | 	register_request(osdc, req); | ||||||
| 
 | 
 | ||||||
| @ -1131,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||||||
| 	up_read(&osdc->map_sem); | 	up_read(&osdc->map_sem); | ||||||
| 	return rc; | 	return rc; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_start_request); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * wait for a request to complete |  * wait for a request to complete | ||||||
| @ -1153,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, | |||||||
| 	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); | 	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); | ||||||
| 	return req->r_result; | 	return req->r_result; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_wait_request); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * sync - wait for all in-flight requests to flush.  avoid starvation. |  * sync - wait for all in-flight requests to flush.  avoid starvation. | ||||||
| @ -1186,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) | |||||||
| 	mutex_unlock(&osdc->request_mutex); | 	mutex_unlock(&osdc->request_mutex); | ||||||
| 	dout("sync done (thru tid %llu)\n", last_tid); | 	dout("sync done (thru tid %llu)\n", last_tid); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_sync); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * init, shutdown |  * init, shutdown | ||||||
| @ -1211,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||||||
| 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | ||||||
| 
 | 
 | ||||||
| 	schedule_delayed_work(&osdc->osds_timeout_work, | 	schedule_delayed_work(&osdc->osds_timeout_work, | ||||||
| 	   round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); | 	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); | ||||||
| 
 | 
 | ||||||
| 	err = -ENOMEM; | 	err = -ENOMEM; | ||||||
| 	osdc->req_mempool = mempool_create_kmalloc_pool(10, | 	osdc->req_mempool = mempool_create_kmalloc_pool(10, | ||||||
| @ -1237,6 +1464,7 @@ out_mempool: | |||||||
| out: | out: | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_init); | ||||||
| 
 | 
 | ||||||
| void ceph_osdc_stop(struct ceph_osd_client *osdc) | void ceph_osdc_stop(struct ceph_osd_client *osdc) | ||||||
| { | { | ||||||
| @ -1251,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) | |||||||
| 	ceph_msgpool_destroy(&osdc->msgpool_op); | 	ceph_msgpool_destroy(&osdc->msgpool_op); | ||||||
| 	ceph_msgpool_destroy(&osdc->msgpool_op_reply); | 	ceph_msgpool_destroy(&osdc->msgpool_op_reply); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_stop); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Read some contiguous pages.  If we cross a stripe boundary, shorten |  * Read some contiguous pages.  If we cross a stripe boundary, shorten | ||||||
| @ -1288,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||||||
| 	dout("readpages result %d\n", rc); | 	dout("readpages result %d\n", rc); | ||||||
| 	return rc; | 	return rc; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_readpages); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * do a synchronous write on N pages |  * do a synchronous write on N pages | ||||||
| @ -1330,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||||||
| 	dout("writepages result %d\n", rc); | 	dout("writepages result %d\n", rc); | ||||||
| 	return rc; | 	return rc; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_osdc_writepages); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * handle incoming message |  * handle incoming message | ||||||
| @ -1420,6 +1651,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||||||
| 		} | 		} | ||||||
| 		m->pages = req->r_pages; | 		m->pages = req->r_pages; | ||||||
| 		m->nr_pages = req->r_num_pages; | 		m->nr_pages = req->r_num_pages; | ||||||
|  | #ifdef CONFIG_BLOCK | ||||||
|  | 		m->bio = req->r_bio; | ||||||
|  | #endif | ||||||
| 	} | 	} | ||||||
| 	*skip = 0; | 	*skip = 0; | ||||||
| 	req->r_con_filling_msg = ceph_con_get(con); | 	req->r_con_filling_msg = ceph_con_get(con); | ||||||
| @ -1,14 +1,15 @@ | |||||||
| 
 | 
 | ||||||
| #include "ceph_debug.h" | #include <linux/ceph/ceph_debug.h> | ||||||
| 
 | 
 | ||||||
|  | #include <linux/module.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <asm/div64.h> | #include <asm/div64.h> | ||||||
| 
 | 
 | ||||||
| #include "super.h" | #include <linux/ceph/libceph.h> | ||||||
| #include "osdmap.h" | #include <linux/ceph/osdmap.h> | ||||||
| #include "crush/hash.h" | #include <linux/ceph/decode.h> | ||||||
| #include "crush/mapper.h" | #include <linux/crush/hash.h> | ||||||
| #include "decode.h" | #include <linux/crush/mapper.h> | ||||||
| 
 | 
 | ||||||
| char *ceph_osdmap_state_str(char *str, int len, int state) | char *ceph_osdmap_state_str(char *str, int len, int state) | ||||||
| { | { | ||||||
| @ -417,6 +418,20 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) | ||||||
|  | { | ||||||
|  | 	struct rb_node *rbp; | ||||||
|  | 
 | ||||||
|  | 	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) { | ||||||
|  | 		struct ceph_pg_pool_info *pi = | ||||||
|  | 			rb_entry(rbp, struct ceph_pg_pool_info, node); | ||||||
|  | 		if (pi->name && strcmp(pi->name, name) == 0) | ||||||
|  | 			return pi->id; | ||||||
|  | 	} | ||||||
|  | 	return -ENOENT; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pg_poolid_by_name); | ||||||
|  | 
 | ||||||
| static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | ||||||
| { | { | ||||||
| 	rb_erase(&pi->node, root); | 	rb_erase(&pi->node, root); | ||||||
| @ -966,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||||||
| 
 | 
 | ||||||
| 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_calc_file_object_mapping); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * calculate an object layout (i.e. pgid) from an oid, |  * calculate an object layout (i.e. pgid) from an oid, | ||||||
| @ -1011,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||||||
| 	ol->ol_stripe_unit = fl->fl_object_stripe_unit; | 	ol->ol_stripe_unit = fl->fl_object_stripe_unit; | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_calc_object_layout); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Calculate raw osd vector for the given pgid.  Return pointer to osd |  * Calculate raw osd vector for the given pgid.  Return pointer to osd | ||||||
| @ -1108,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||||||
| 			return osds[i]; | 			return osds[i]; | ||||||
| 	return -1; | 	return -1; | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(ceph_calc_pg_primary); | ||||||
							
								
								
									
										154
									
								
								net/ceph/pagelist.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								net/ceph/pagelist.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,154 @@ | |||||||
|  | 
 | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/gfp.h> | ||||||
|  | #include <linux/pagemap.h> | ||||||
|  | #include <linux/highmem.h> | ||||||
|  | #include <linux/ceph/pagelist.h> | ||||||
|  | 
 | ||||||
|  | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||||||
|  | { | ||||||
|  | 	if (pl->mapped_tail) { | ||||||
|  | 		struct page *page = list_entry(pl->head.prev, struct page, lru); | ||||||
|  | 		kunmap(page); | ||||||
|  | 		pl->mapped_tail = NULL; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||||||
|  | { | ||||||
|  | 	ceph_pagelist_unmap_tail(pl); | ||||||
|  | 	while (!list_empty(&pl->head)) { | ||||||
|  | 		struct page *page = list_first_entry(&pl->head, struct page, | ||||||
|  | 						     lru); | ||||||
|  | 		list_del(&page->lru); | ||||||
|  | 		__free_page(page); | ||||||
|  | 	} | ||||||
|  | 	ceph_pagelist_free_reserve(pl); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pagelist_release); | ||||||
|  | 
 | ||||||
|  | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||||||
|  | { | ||||||
|  | 	struct page *page; | ||||||
|  | 
 | ||||||
|  | 	if (!pl->num_pages_free) { | ||||||
|  | 		page = __page_cache_alloc(GFP_NOFS); | ||||||
|  | 	} else { | ||||||
|  | 		page = list_first_entry(&pl->free_list, struct page, lru); | ||||||
|  | 		list_del(&page->lru); | ||||||
|  | 		--pl->num_pages_free; | ||||||
|  | 	} | ||||||
|  | 	if (!page) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	pl->room += PAGE_SIZE; | ||||||
|  | 	ceph_pagelist_unmap_tail(pl); | ||||||
|  | 	list_add_tail(&page->lru, &pl->head); | ||||||
|  | 	pl->mapped_tail = kmap(page); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) | ||||||
|  | { | ||||||
|  | 	while (pl->room < len) { | ||||||
|  | 		size_t bit = pl->room; | ||||||
|  | 		int ret; | ||||||
|  | 
 | ||||||
|  | 		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||||||
|  | 		       buf, bit); | ||||||
|  | 		pl->length += bit; | ||||||
|  | 		pl->room -= bit; | ||||||
|  | 		buf += bit; | ||||||
|  | 		len -= bit; | ||||||
|  | 		ret = ceph_pagelist_addpage(pl); | ||||||
|  | 		if (ret) | ||||||
|  | 			return ret; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||||||
|  | 	pl->length += len; | ||||||
|  | 	pl->room -= len; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pagelist_append); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Allocate enough pages for a pagelist to append the given amount | ||||||
|  |  * of data without without allocating. | ||||||
|  |  * Returns: 0 on success, -ENOMEM on error. | ||||||
|  |  */ | ||||||
|  | int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space) | ||||||
|  | { | ||||||
|  | 	if (space <= pl->room) | ||||||
|  | 		return 0; | ||||||
|  | 	space -= pl->room; | ||||||
|  | 	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */ | ||||||
|  | 
 | ||||||
|  | 	while (space > pl->num_pages_free) { | ||||||
|  | 		struct page *page = __page_cache_alloc(GFP_NOFS); | ||||||
|  | 		if (!page) | ||||||
|  | 			return -ENOMEM; | ||||||
|  | 		list_add_tail(&page->lru, &pl->free_list); | ||||||
|  | 		++pl->num_pages_free; | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pagelist_reserve); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Free any pages that have been preallocated. | ||||||
|  |  */ | ||||||
|  | int ceph_pagelist_free_reserve(struct ceph_pagelist *pl) | ||||||
|  | { | ||||||
|  | 	while (!list_empty(&pl->free_list)) { | ||||||
|  | 		struct page *page = list_first_entry(&pl->free_list, | ||||||
|  | 						     struct page, lru); | ||||||
|  | 		list_del(&page->lru); | ||||||
|  | 		__free_page(page); | ||||||
|  | 		--pl->num_pages_free; | ||||||
|  | 	} | ||||||
|  | 	BUG_ON(pl->num_pages_free); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pagelist_free_reserve); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Create a truncation point. | ||||||
|  |  */ | ||||||
|  | void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||||||
|  | 			      struct ceph_pagelist_cursor *c) | ||||||
|  | { | ||||||
|  | 	c->pl = pl; | ||||||
|  | 	c->page_lru = pl->head.prev; | ||||||
|  | 	c->room = pl->room; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pagelist_set_cursor); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Truncate a pagelist to the given point. Move extra pages to reserve. | ||||||
|  |  * This won't sleep. | ||||||
|  |  * Returns: 0 on success, | ||||||
|  |  *          -EINVAL if the pagelist doesn't match the trunc point pagelist | ||||||
|  |  */ | ||||||
|  | int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||||||
|  | 			   struct ceph_pagelist_cursor *c) | ||||||
|  | { | ||||||
|  | 	struct page *page; | ||||||
|  | 
 | ||||||
|  | 	if (pl != c->pl) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 	ceph_pagelist_unmap_tail(pl); | ||||||
|  | 	while (pl->head.prev != c->page_lru) { | ||||||
|  | 		page = list_entry(pl->head.prev, struct page, lru); | ||||||
|  | 		list_del(&page->lru);                /* remove from pagelist */ | ||||||
|  | 		list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ | ||||||
|  | 		++pl->num_pages_free; | ||||||
|  | 	} | ||||||
|  | 	pl->room = c->room; | ||||||
|  | 	if (!list_empty(&pl->head)) { | ||||||
|  | 		page = list_entry(pl->head.prev, struct page, lru); | ||||||
|  | 		pl->mapped_tail = kmap(page); | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_pagelist_truncate); | ||||||
							
								
								
									
										223
									
								
								net/ceph/pagevec.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										223
									
								
								net/ceph/pagevec.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,223 @@ | |||||||
|  | #include <linux/ceph/ceph_debug.h> | ||||||
|  | 
 | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/sched.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | #include <linux/file.h> | ||||||
|  | #include <linux/namei.h> | ||||||
|  | #include <linux/writeback.h> | ||||||
|  | 
 | ||||||
|  | #include <linux/ceph/libceph.h> | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * build a vector of user pages | ||||||
|  |  */ | ||||||
|  | struct page **ceph_get_direct_page_vector(const char __user *data, | ||||||
|  | 						 int num_pages, | ||||||
|  | 						 loff_t off, size_t len) | ||||||
|  | { | ||||||
|  | 	struct page **pages; | ||||||
|  | 	int rc; | ||||||
|  | 
 | ||||||
|  | 	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||||||
|  | 	if (!pages) | ||||||
|  | 		return ERR_PTR(-ENOMEM); | ||||||
|  | 
 | ||||||
|  | 	down_read(¤t->mm->mmap_sem); | ||||||
|  | 	rc = get_user_pages(current, current->mm, (unsigned long)data, | ||||||
|  | 			    num_pages, 0, 0, pages, NULL); | ||||||
|  | 	up_read(¤t->mm->mmap_sem); | ||||||
|  | 	if (rc < 0) | ||||||
|  | 		goto fail; | ||||||
|  | 	return pages; | ||||||
|  | 
 | ||||||
|  | fail: | ||||||
|  | 	kfree(pages); | ||||||
|  | 	return ERR_PTR(rc); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_get_direct_page_vector); | ||||||
|  | 
 | ||||||
|  | void ceph_put_page_vector(struct page **pages, int num_pages) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < num_pages; i++) | ||||||
|  | 		put_page(pages[i]); | ||||||
|  | 	kfree(pages); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_put_page_vector); | ||||||
|  | 
 | ||||||
|  | void ceph_release_page_vector(struct page **pages, int num_pages) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < num_pages; i++) | ||||||
|  | 		__free_pages(pages[i], 0); | ||||||
|  | 	kfree(pages); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_release_page_vector); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * allocate a vector new pages | ||||||
|  |  */ | ||||||
|  | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||||||
|  | { | ||||||
|  | 	struct page **pages; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||||||
|  | 	if (!pages) | ||||||
|  | 		return ERR_PTR(-ENOMEM); | ||||||
|  | 	for (i = 0; i < num_pages; i++) { | ||||||
|  | 		pages[i] = __page_cache_alloc(flags); | ||||||
|  | 		if (pages[i] == NULL) { | ||||||
|  | 			ceph_release_page_vector(pages, i); | ||||||
|  | 			return ERR_PTR(-ENOMEM); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return pages; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_alloc_page_vector); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * copy user data into a page vector | ||||||
|  |  */ | ||||||
|  | int ceph_copy_user_to_page_vector(struct page **pages, | ||||||
|  | 					 const char __user *data, | ||||||
|  | 					 loff_t off, size_t len) | ||||||
|  | { | ||||||
|  | 	int i = 0; | ||||||
|  | 	int po = off & ~PAGE_CACHE_MASK; | ||||||
|  | 	int left = len; | ||||||
|  | 	int l, bad; | ||||||
|  | 
 | ||||||
|  | 	while (left > 0) { | ||||||
|  | 		l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||||||
|  | 		bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||||||
|  | 		if (bad == l) | ||||||
|  | 			return -EFAULT; | ||||||
|  | 		data += l - bad; | ||||||
|  | 		left -= l - bad; | ||||||
|  | 		po += l - bad; | ||||||
|  | 		if (po == PAGE_CACHE_SIZE) { | ||||||
|  | 			po = 0; | ||||||
|  | 			i++; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return len; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_copy_user_to_page_vector); | ||||||
|  | 
 | ||||||
|  | int ceph_copy_to_page_vector(struct page **pages, | ||||||
|  | 				    const char *data, | ||||||
|  | 				    loff_t off, size_t len) | ||||||
|  | { | ||||||
|  | 	int i = 0; | ||||||
|  | 	size_t po = off & ~PAGE_CACHE_MASK; | ||||||
|  | 	size_t left = len; | ||||||
|  | 	size_t l; | ||||||
|  | 
 | ||||||
|  | 	while (left > 0) { | ||||||
|  | 		l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||||||
|  | 		memcpy(page_address(pages[i]) + po, data, l); | ||||||
|  | 		data += l; | ||||||
|  | 		left -= l; | ||||||
|  | 		po += l; | ||||||
|  | 		if (po == PAGE_CACHE_SIZE) { | ||||||
|  | 			po = 0; | ||||||
|  | 			i++; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return len; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_copy_to_page_vector); | ||||||
|  | 
 | ||||||
|  | int ceph_copy_from_page_vector(struct page **pages, | ||||||
|  | 				    char *data, | ||||||
|  | 				    loff_t off, size_t len) | ||||||
|  | { | ||||||
|  | 	int i = 0; | ||||||
|  | 	size_t po = off & ~PAGE_CACHE_MASK; | ||||||
|  | 	size_t left = len; | ||||||
|  | 	size_t l; | ||||||
|  | 
 | ||||||
|  | 	while (left > 0) { | ||||||
|  | 		l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||||||
|  | 		memcpy(data, page_address(pages[i]) + po, l); | ||||||
|  | 		data += l; | ||||||
|  | 		left -= l; | ||||||
|  | 		po += l; | ||||||
|  | 		if (po == PAGE_CACHE_SIZE) { | ||||||
|  | 			po = 0; | ||||||
|  | 			i++; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return len; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_copy_from_page_vector); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * copy user data from a page vector into a user pointer | ||||||
|  |  */ | ||||||
|  | int ceph_copy_page_vector_to_user(struct page **pages, | ||||||
|  | 					 char __user *data, | ||||||
|  | 					 loff_t off, size_t len) | ||||||
|  | { | ||||||
|  | 	int i = 0; | ||||||
|  | 	int po = off & ~PAGE_CACHE_MASK; | ||||||
|  | 	int left = len; | ||||||
|  | 	int l, bad; | ||||||
|  | 
 | ||||||
|  | 	while (left > 0) { | ||||||
|  | 		l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||||||
|  | 		bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||||||
|  | 		if (bad == l) | ||||||
|  | 			return -EFAULT; | ||||||
|  | 		data += l - bad; | ||||||
|  | 		left -= l - bad; | ||||||
|  | 		if (po) { | ||||||
|  | 			po += l - bad; | ||||||
|  | 			if (po == PAGE_CACHE_SIZE) | ||||||
|  | 				po = 0; | ||||||
|  | 		} | ||||||
|  | 		i++; | ||||||
|  | 	} | ||||||
|  | 	return len; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_copy_page_vector_to_user); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Zero an extent within a page vector.  Offset is relative to the | ||||||
|  |  * start of the first page. | ||||||
|  |  */ | ||||||
|  | void ceph_zero_page_vector_range(int off, int len, struct page **pages) | ||||||
|  | { | ||||||
|  | 	int i = off >> PAGE_CACHE_SHIFT; | ||||||
|  | 
 | ||||||
|  | 	off &= ~PAGE_CACHE_MASK; | ||||||
|  | 
 | ||||||
|  | 	dout("zero_page_vector_page %u~%u\n", off, len); | ||||||
|  | 
 | ||||||
|  | 	/* leading partial page? */ | ||||||
|  | 	if (off) { | ||||||
|  | 		int end = min((int)PAGE_CACHE_SIZE, off + len); | ||||||
|  | 		dout("zeroing %d %p head from %d\n", i, pages[i], | ||||||
|  | 		     (int)off); | ||||||
|  | 		zero_user_segment(pages[i], off, end); | ||||||
|  | 		len -= (end - off); | ||||||
|  | 		i++; | ||||||
|  | 	} | ||||||
|  | 	while (len >= PAGE_CACHE_SIZE) { | ||||||
|  | 		dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||||||
|  | 		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||||||
|  | 		len -= PAGE_CACHE_SIZE; | ||||||
|  | 		i++; | ||||||
|  | 	} | ||||||
|  | 	/* trailing partial page? */ | ||||||
|  | 	if (len) { | ||||||
|  | 		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||||||
|  | 		zero_user_segment(pages[i], 0, len); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(ceph_zero_page_vector_range); | ||||||
|  | 
 | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user