From 757856d2b9568a701df9ea6a4be68effbb9d6f44 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 25 Jun 2015 17:47:45 +0300 Subject: [PATCH 1/4] libceph: enable ceph in a non-default network namespace Grab a reference on a network namespace of the 'rbd map' (in case of rbd) or 'mount' (in case of ceph) process and use that to open sockets instead of always using init_net and bailing if network namespace is anything but init_net. Be careful to not share struct ceph_client instances between different namespaces and don't add any code in the !CONFIG_NET_NS case. This is based on a patch from Hong Zhiguo . Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/messenger.h | 3 +++ net/ceph/ceph_common.c | 16 ++++++++++------ net/ceph/messenger.c | 10 +++++++++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cb7db320dd27..f30329f72641 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,8 +17,6 @@ #include #include #include -#include -#include #include @@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt, int i; int ret; + /* + * Don't bother comparing options if network namespaces don't + * match. + */ + if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) + return -1; + ret = memcmp(opt1, opt2, ofs); if (ret) return ret; @@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -608,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); fail: + ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } @@ -621,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - ceph_monc_stop(&client->monc); + ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1679f47280e2..5c1f98ea6741 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -479,7 +480,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; @@ -2944,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr, msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); + write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); dout("%s %p\n", __func__, msgr); } EXPORT_SYMBOL(ceph_messenger_init); +void ceph_messenger_fini(struct ceph_messenger *msgr) +{ + put_net(read_pnet(&msgr->net)); +} +EXPORT_SYMBOL(ceph_messenger_fini); + static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ From c44bd69c0c8cfadf0239437635b2933efb1f6c4c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Jul 2015 13:57:52 +0300 Subject: [PATCH 2/4] libceph: treat sockaddr_storage with uninitialized family as blank addr_is_blank() should return true if family is neither AF_INET nor AF_INET6. This is what its counterpart entity_addr_t::is_blank_ip() is doing and it is the right thing to do: in process_banner() we check if our address is blank and if it is "learn" it from our peer. As it is, we never learn our address and always send out a blank one. This goes way back to ceph.git commit dd732cbfc1c9 ("use sockaddr_storage; and some ipv6 support groundwork") from 2009. While at at, do not open-code ipv6_addr_any() and use INADDR_ANY constant instead of 0. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5c1f98ea6741..e3be1d22a247 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con) static bool addr_is_blank(struct sockaddr_storage *ss) { + struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + switch (ss->ss_family) { case AF_INET: - return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; + return addr->s_addr == htonl(INADDR_ANY); case AF_INET6: - return - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; + return ipv6_addr_any(addr6); + default: + return true; } - return false; } static int addr_port(struct sockaddr_storage *ss) From 398ecff5a562b7b69f77582f98a4b04d0de1f066 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:46:14 -0400 Subject: [PATCH 3/4] MAINTAINERS: update ceph entries - The Ceph common code is used by both fs/ceph and drivers/block/rbd. Add a separate maintainers entry. - Add Ilya as libceph maintainer and cephfs submaintainer. - Attribute Documentation/ABI/testing/sysfs-bus-rbd to rbd. - ceph-devel@vger.kernel.org should be L, not M in rbd entry. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..58cbc2118e46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2562,19 +2562,29 @@ F: arch/powerpc/include/uapi/asm/spu*.h F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ -CEPH DISTRIBUTED FILE SYSTEM CLIENT +CEPH COMMON CODE (LIBCEPH) +M: Ilya Dryomov M: "Yan, Zheng" M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported -F: Documentation/filesystems/ceph.txt -F: fs/ceph/ F: net/ceph/ F: include/linux/ceph/ F: include/linux/crush/ +CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) +M: "Yan, Zheng" +M: Sage Weil +M: Ilya Dryomov +L: ceph-devel@vger.kernel.org +W: http://ceph.com/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +S: Supported +F: Documentation/filesystems/ceph.txt +F: fs/ceph/ + CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: L: linux-usb@vger.kernel.org S: Orphan @@ -8366,10 +8376,11 @@ RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov M: Sage Weil M: Alex Elder -M: ceph-devel@vger.kernel.org +L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported +F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h From 6e67b7ae2157bdeb6b56381e530fc74bb68b6149 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:47:37 -0400 Subject: [PATCH 4/4] MAINTAINERS: add secondary tree for ceph modules The Ceph kernel code is primarily developed in the github tree, and only pushed to the korg tree before going to Linus. If Sage is unavailable and another maintainer needs to push something upstream, pull requests may originate from the github tree instead of Sage's korg tree. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58cbc2118e46..0d70760e8135 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2569,6 +2569,7 @@ M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: net/ceph/ F: include/linux/ceph/ @@ -2581,6 +2582,7 @@ M: Ilya Dryomov L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph/ @@ -8379,6 +8381,7 @@ M: Alex Elder L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c