linux/net/sunrpc/svcauth_unix.c

979 lines
23 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/addr.h>
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/hash.h>
#include <linux/string.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
#include <net/sock.h>
#include <net/ipv6.h>
#include <linux/kernel.h>
#include <linux/user_namespace.h>
#define RPCDBG_FACILITY RPCDBG_AUTH
#include "netns.h"
/*
* AUTHUNIX and AUTHNULL credentials are both handled here.
* AUTHNULL is treated just like AUTHUNIX except that the uid/gid
* are always nobody (-2). i.e. we do the same IP address checks for
* AUTHNULL as for AUTHUNIX, and that is done here.
*/
struct unix_domain {
struct auth_domain h;
/* other stuff later */
};
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
extern struct auth_ops svcauth_tls;
static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
{
struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
struct unix_domain *ud = container_of(dom, struct unix_domain, h);
kfree(dom->name);
kfree(ud);
}
static void svcauth_unix_domain_release(struct auth_domain *dom)
{
call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu);
}
struct auth_domain *unix_domain_find(char *name)
{
struct auth_domain *rv;
struct unix_domain *new = NULL;
rv = auth_domain_find(name);
while(1) {
if (rv) {
if (new && rv != &new->h)
svcauth_unix_domain_release(&new->h);
if (rv->flavour != &svcauth_unix) {
auth_domain_put(rv);
return NULL;
}
return rv;
}
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (new == NULL)
return NULL;
kref_init(&new->h.ref);
new->h.name = kstrdup(name, GFP_KERNEL);
if (new->h.name == NULL) {
kfree(new);
return NULL;
}
new->h.flavour = &svcauth_unix;
rv = auth_domain_lookup(name, &new->h);
}
}
EXPORT_SYMBOL_GPL(unix_domain_find);
/**************************************************
* cache for IP address to unix_domain
* as needed by AUTH_UNIX
*/
#define IP_HASHBITS 8
#define IP_HASHMAX (1<<IP_HASHBITS)
struct ip_map {
struct cache_head h;
char m_class[8]; /* e.g. "nfsd" */
struct in6_addr m_addr;
struct unix_domain *m_client;
struct rcu_head m_rcu;
};
static void ip_map_put(struct kref *kref)
{
struct cache_head *item = container_of(kref, struct cache_head, ref);
struct ip_map *im = container_of(item, struct ip_map,h);
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
auth_domain_put(&im->m_client->h);
kfree_rcu(im, m_rcu);
}
static inline int hash_ip6(const struct in6_addr *ip)
{
return hash_32(ipv6_addr_hash(ip), IP_HASHBITS);
}
static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
{
struct ip_map *orig = container_of(corig, struct ip_map, h);
struct ip_map *new = container_of(cnew, struct ip_map, h);
return strcmp(orig->m_class, new->m_class) == 0 &&
ipv6_addr_equal(&orig->m_addr, &new->m_addr);
}
static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
{
struct ip_map *new = container_of(cnew, struct ip_map, h);
struct ip_map *item = container_of(citem, struct ip_map, h);
strcpy(new->m_class, item->m_class);
new->m_addr = item->m_addr;
}
static void update(struct cache_head *cnew, struct cache_head *citem)
{
struct ip_map *new = container_of(cnew, struct ip_map, h);
struct ip_map *item = container_of(citem, struct ip_map, h);
kref_get(&item->m_client->h.ref);
new->m_client = item->m_client;
}
static struct cache_head *ip_map_alloc(void)
{
struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL);
if (i)
return &i->h;
else
return NULL;
}
static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall(cd, h);
}
static void ip_map_request(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen)
{
char text_addr[40];
struct ip_map *im = container_of(h, struct ip_map, h);
if (ipv6_addr_v4mapped(&(im->m_addr))) {
snprintf(text_addr, 20, "%pI4", &im->m_addr.s6_addr32[3]);
} else {
snprintf(text_addr, 40, "%pI6", &im->m_addr);
}
qword_add(bpp, blen, im->m_class);
qword_add(bpp, blen, text_addr);
(*bpp)[-1] = '\n';
}
static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time64_t expiry);
static int ip_map_parse(struct cache_detail *cd,
char *mesg, int mlen)
{
/* class ipaddress [domainname] */
/* should be safe just to use the start of the input buffer
* for scratch: */
char *buf = mesg;
int len;
char class[8];
union {
struct sockaddr sa;
struct sockaddr_in s4;
struct sockaddr_in6 s6;
} address;
struct sockaddr_in6 sin6;
int err;
struct ip_map *ipmp;
struct auth_domain *dom;
time64_t expiry;
if (mesg[mlen-1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
/* class */
len = qword_get(&mesg, class, sizeof(class));
if (len <= 0) return -EINVAL;
/* ip address */
len = qword_get(&mesg, buf, mlen);
if (len <= 0) return -EINVAL;
if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0)
return -EINVAL;
switch (address.sa.sa_family) {
case AF_INET:
/* Form a mapped IPv4 address in sin6 */
sin6.sin6_family = AF_INET6;
ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
&sin6.sin6_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
memcpy(&sin6, &address.s6, sizeof(sin6));
break;
#endif
default:
return -EINVAL;
}
expiry = get_expiry(&mesg);
if (expiry ==0)
return -EINVAL;
/* domainname, or empty for NEGATIVE */
len = qword_get(&mesg, buf, mlen);
if (len < 0) return -EINVAL;
if (len) {
dom = unix_domain_find(buf);
if (dom == NULL)
return -ENOENT;
} else
dom = NULL;
/* IPv6 scope IDs are ignored for now */
ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
if (ipmp) {
err = __ip_map_update(cd, ipmp,
container_of(dom, struct unix_domain, h),
expiry);
} else
err = -ENOMEM;
if (dom)
auth_domain_put(dom);
cache_flush();
return err;
}
static int ip_map_show(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h)
{
struct ip_map *im;
struct in6_addr addr;
char *dom = "-no-domain-";
if (h == NULL) {
seq_puts(m, "#class IP domain\n");
return 0;
}
im = container_of(h, struct ip_map, h);
/* class addr domain */
addr = im->m_addr;
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
dom = im->m_client->h.name;
if (ipv6_addr_v4mapped(&addr)) {
seq_printf(m, "%s %pI4 %s\n",
im->m_class, &addr.s6_addr32[3], dom);
} else {
seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom);
}
return 0;
}
static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
struct in6_addr *addr)
{
struct ip_map ip;
struct cache_head *ch;
strcpy(ip.m_class, class);
ip.m_addr = *addr;
ch = sunrpc_cache_lookup_rcu(cd, &ip.h,
hash_str(class, IP_HASHBITS) ^
hash_ip6(addr));
if (ch)
return container_of(ch, struct ip_map, h);
else
return NULL;
}
static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
struct unix_domain *udom, time64_t expiry)
{
struct ip_map ip;
struct cache_head *ch;
ip.m_client = udom;
ip.h.flags = 0;
if (!udom)
set_bit(CACHE_NEGATIVE, &ip.h.flags);
ip.h.expiry_time = expiry;
ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
hash_str(ipm->m_class, IP_HASHBITS) ^
hash_ip6(&ipm->m_addr));
if (!ch)
return -ENOMEM;
cache_put(ch, cd);
return 0;
}
void svcauth_unix_purge(struct net *net)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
cache_purge(sn->ip_map_cache);
}
EXPORT_SYMBOL_GPL(svcauth_unix_purge);
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
static inline struct ip_map *
ip_map_cached_get(struct svc_xprt *xprt)
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
{
struct ip_map *ipm = NULL;
struct sunrpc_net *sn;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
ipm = xprt->xpt_auth_cache;
if (ipm != NULL) {
sn = net_generic(xprt->xpt_net, sunrpc_net_id);
if (cache_is_expired(sn->ip_map_cache, &ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
xprt->xpt_auth_cache = NULL;
spin_unlock(&xprt->xpt_lock);
cache_put(&ipm->h, sn->ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
}
spin_unlock(&xprt->xpt_lock);
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
}
return ipm;
}
static inline void
ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
{
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
if (xprt->xpt_auth_cache == NULL) {
/* newly cached, keep the reference */
xprt->xpt_auth_cache = ipm;
ipm = NULL;
}
spin_unlock(&xprt->xpt_lock);
}
if (ipm) {
struct sunrpc_net *sn;
sn = net_generic(xprt->xpt_net, sunrpc_net_id);
cache_put(&ipm->h, sn->ip_map_cache);
}
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
}
void
svcauth_unix_info_release(struct svc_xprt *xpt)
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
{
struct ip_map *ipm;
ipm = xpt->xpt_auth_cache;
if (ipm != NULL) {
struct sunrpc_net *sn;
sn = net_generic(xpt->xpt_net, sunrpc_net_id);
cache_put(&ipm->h, sn->ip_map_cache);
}
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
}
/****************************************************************************
* auth.unix.gid cache
* simple cache to map a UID to a list of GIDs
* because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS
*/
#define GID_HASHBITS 8
#define GID_HASHMAX (1<<GID_HASHBITS)
struct unix_gid {
struct cache_head h;
kuid_t uid;
struct group_info *gi;
struct rcu_head rcu;
};
static int unix_gid_hash(kuid_t uid)
{
return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
}
static void unix_gid_put(struct kref *kref)
{
struct cache_head *item = container_of(kref, struct cache_head, ref);
struct unix_gid *ug = container_of(item, struct unix_gid, h);
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
put_group_info(ug->gi);
kfree_rcu(ug, rcu);
}
static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
{
struct unix_gid *orig = container_of(corig, struct unix_gid, h);
struct unix_gid *new = container_of(cnew, struct unix_gid, h);
return uid_eq(orig->uid, new->uid);
}
static void unix_gid_init(struct cache_head *cnew, struct cache_head *citem)
{
struct unix_gid *new = container_of(cnew, struct unix_gid, h);
struct unix_gid *item = container_of(citem, struct unix_gid, h);
new->uid = item->uid;
}
static void unix_gid_update(struct cache_head *cnew, struct cache_head *citem)
{
struct unix_gid *new = container_of(cnew, struct unix_gid, h);
struct unix_gid *item = container_of(citem, struct unix_gid, h);
get_group_info(item->gi);
new->gi = item->gi;
}
static struct cache_head *unix_gid_alloc(void)
{
struct unix_gid *g = kmalloc(sizeof(*g), GFP_KERNEL);
if (g)
return &g->h;
else
return NULL;
}
static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
{
return sunrpc_cache_pipe_upcall_timeout(cd, h);
}
static void unix_gid_request(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen)
{
char tuid[20];
struct unix_gid *ug = container_of(h, struct unix_gid, h);
snprintf(tuid, 20, "%u", from_kuid(&init_user_ns, ug->uid));
qword_add(bpp, blen, tuid);
(*bpp)[-1] = '\n';
}
static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid);
static int unix_gid_parse(struct cache_detail *cd,
char *mesg, int mlen)
{
/* uid expiry Ngid gid0 gid1 ... gidN-1 */
int id;
kuid_t uid;
int gids;
int rv;
int i;
int err;
time64_t expiry;
struct unix_gid ug, *ugp;
if (mesg[mlen - 1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
rv = get_int(&mesg, &id);
if (rv)
return -EINVAL;
uid = make_kuid(current_user_ns(), id);
ug.uid = uid;
expiry = get_expiry(&mesg);
if (expiry == 0)
return -EINVAL;
rv = get_int(&mesg, &gids);
if (rv || gids < 0 || gids > 8192)
return -EINVAL;
ug.gi = groups_alloc(gids);
if (!ug.gi)
return -ENOMEM;
for (i = 0 ; i < gids ; i++) {
int gid;
kgid_t kgid;
rv = get_int(&mesg, &gid);
err = -EINVAL;
if (rv)
goto out;
kgid = make_kgid(current_user_ns(), gid);
if (!gid_valid(kgid))
goto out;
cred: simpler, 1D supplementary groups Current supplementary groups code can massively overallocate memory and is implemented in a way so that access to individual gid is done via 2D array. If number of gids is <= 32, memory allocation is more or less tolerable (140/148 bytes). But if it is not, code allocates full page (!) regardless and, what's even more fun, doesn't reuse small 32-entry array. 2D array means dependent shifts, loads and LEAs without possibility to optimize them (gid is never known at compile time). All of the above is unnecessary. Switch to the usual trailing-zero-len-array scheme. Memory is allocated with kmalloc/vmalloc() and only as much as needed. Accesses become simpler (LEA 8(gi,idx,4) or even without displacement). Maximum number of gids is 65536 which translates to 256KB+8 bytes. I think kernel can handle such allocation. On my usual desktop system with whole 9 (nine) aux groups, struct group_info shrinks from 148 bytes to 44 bytes, yay! Nice side effects: - "gi->gid[i]" is shorter than "GROUP_AT(gi, i)", less typing, - fix little mess in net/ipv4/ping.c should have been using GROUP_AT macro but this point becomes moot, - aux group allocation is persistent and should be accounted as such. Link: http://lkml.kernel.org/r/20160817201927.GA2096@p183.telecom.by Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Cc: Vasily Kulikov <segoon@openwall.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 00:03:12 +00:00
ug.gi->gid[i] = kgid;
}
groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
ug.h.flags = 0;
ug.h.expiry_time = expiry;
ch = sunrpc_cache_update(cd,
&ug.h, &ugp->h,
unix_gid_hash(uid));
if (!ch)
err = -ENOMEM;
else {
err = 0;
cache_put(ch, cd);
}
} else
err = -ENOMEM;
out:
if (ug.gi)
put_group_info(ug.gi);
return err;
}
static int unix_gid_show(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h)
{
struct user_namespace *user_ns = m->file->f_cred->user_ns;
struct unix_gid *ug;
int i;
int glen;
if (h == NULL) {
seq_puts(m, "#uid cnt: gids...\n");
return 0;
}
ug = container_of(h, struct unix_gid, h);
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
glen = ug->gi->ngroups;
else
glen = 0;
seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
for (i = 0; i < glen; i++)
cred: simpler, 1D supplementary groups Current supplementary groups code can massively overallocate memory and is implemented in a way so that access to individual gid is done via 2D array. If number of gids is <= 32, memory allocation is more or less tolerable (140/148 bytes). But if it is not, code allocates full page (!) regardless and, what's even more fun, doesn't reuse small 32-entry array. 2D array means dependent shifts, loads and LEAs without possibility to optimize them (gid is never known at compile time). All of the above is unnecessary. Switch to the usual trailing-zero-len-array scheme. Memory is allocated with kmalloc/vmalloc() and only as much as needed. Accesses become simpler (LEA 8(gi,idx,4) or even without displacement). Maximum number of gids is 65536 which translates to 256KB+8 bytes. I think kernel can handle such allocation. On my usual desktop system with whole 9 (nine) aux groups, struct group_info shrinks from 148 bytes to 44 bytes, yay! Nice side effects: - "gi->gid[i]" is shorter than "GROUP_AT(gi, i)", less typing, - fix little mess in net/ipv4/ping.c should have been using GROUP_AT macro but this point becomes moot, - aux group allocation is persistent and should be accounted as such. Link: http://lkml.kernel.org/r/20160817201927.GA2096@p183.telecom.by Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Cc: Vasily Kulikov <segoon@openwall.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 00:03:12 +00:00
seq_printf(m, " %d", from_kgid_munged(user_ns, ug->gi->gid[i]));
seq_printf(m, "\n");
return 0;
}
static const struct cache_detail unix_gid_cache_template = {
.owner = THIS_MODULE,
.hash_size = GID_HASHMAX,
.name = "auth.unix.gid",
.cache_put = unix_gid_put,
.cache_upcall = unix_gid_upcall,
.cache_request = unix_gid_request,
.cache_parse = unix_gid_parse,
.cache_show = unix_gid_show,
.match = unix_gid_match,
.init = unix_gid_init,
.update = unix_gid_update,
.alloc = unix_gid_alloc,
};
int unix_gid_cache_create(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd;
int err;
cd = cache_create_net(&unix_gid_cache_template, net);
if (IS_ERR(cd))
return PTR_ERR(cd);
err = cache_register_net(cd, net);
if (err) {
cache_destroy_net(cd, net);
return err;
}
sn->unix_gid_cache = cd;
return 0;
}
void unix_gid_cache_destroy(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd = sn->unix_gid_cache;
sn->unix_gid_cache = NULL;
cache_purge(cd);
cache_unregister_net(cd, net);
cache_destroy_net(cd, net);
}
static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid)
{
struct unix_gid ug;
struct cache_head *ch;
ug.uid = uid;
ch = sunrpc_cache_lookup_rcu(cd, &ug.h, unix_gid_hash(uid));
if (ch)
return container_of(ch, struct unix_gid, h);
else
return NULL;
}
static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
{
struct unix_gid *ug;
struct group_info *gi;
int ret;
struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net,
sunrpc_net_id);
ug = unix_gid_lookup(sn->unix_gid_cache, uid);
if (!ug)
return ERR_PTR(-EAGAIN);
ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle);
switch (ret) {
case -ENOENT:
return ERR_PTR(-ENOENT);
case -ETIMEDOUT:
return ERR_PTR(-ESHUTDOWN);
case 0:
gi = get_group_info(ug->gi);
cache_put(&ug->h, sn->unix_gid_cache);
return gi;
default:
return ERR_PTR(-EAGAIN);
}
}
knfsd: nfsd: set rq_client to ip-address-determined-domain We want it to be possible for users to restrict exports both by IP address and by pseudoflavor. The pseudoflavor information has previously been passed using special auth_domains stored in the rq_client field. After the preceding patch that stored the pseudoflavor in rq_pflavor, that's now superfluous; so now we use rq_client for the ip information, as auth_null and auth_unix do. However, we keep around the special auth_domain in the rq_gssclient field for backwards compatibility purposes, so we can still do upcalls using the old "gss/pseudoflavor" auth_domain if upcalls using the unix domain to give us an appropriate export. This allows us to continue supporting old mountd. In fact, for this first patch, we always use the "gss/pseudoflavor" auth_domain (and only it) if it is available; thus rq_client is ignored in the auth_gss case, and this patch on its own makes no change in behavior; that will be left to later patches. Note on idmap: I'm almost tempted to just replace the auth_domain in the idmap upcall by a dummy value--no version of idmapd has ever used it, and it's unlikely anyone really wants to perform idmapping differently depending on the where the client is (they may want to perform *credential* mapping differently, but that's a different matter--the idmapper just handles id's used in getattr and setattr). But I'm updating the idmapd code anyway, just out of general backwards-compatibility paranoia. Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-17 11:04:46 +00:00
int
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6, sin6_storage;
struct ip_map *ipm;
struct group_info *gi;
struct svc_cred *cred = &rqstp->rq_cred;
struct svc_xprt *xprt = rqstp->rq_xprt;
struct net *net = xprt->xpt_net;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
switch (rqstp->rq_addr.ss_family) {
case AF_INET:
sin = svc_addr_in(rqstp);
sin6 = &sin6_storage;
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr);
break;
case AF_INET6:
sin6 = svc_addr_in6(rqstp);
break;
default:
BUG();
}
rqstp->rq_client = NULL;
if (rqstp->rq_proc == 0)
goto out;
rqstp->rq_auth_stat = rpc_autherr_badcred;
ipm = ip_map_cached_get(xprt);
[PATCH] knfsd: knfsd: cache ipmap per TCP socket Speed up high call-rate workloads by caching the struct ip_map for the peer on the connected struct svc_sock instead of looking it up in the ip_map cache hashtable on every call. This helps workloads using AUTH_SYS authentication over TCP. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. Profiling showed strcmp(), called from ip_map_match(), was taking 4.8% of each CPU, and ip_map_lookup() was taking 2.9%. This patch drops both contribution into the profile noise. Note that the above result overstates this value of this patch for most workloads. The synthetic clients are all using separate IP addresses, so there are 64 entries in the ip_map cache hash. Because the kernel measured contained the bug fixed in commit commit 1f1e030bf75774b6a283518e1534d598e14147d4 and was running on 64bit little-endian machine, probably all of those 64 entries were on a single chain, thus increasing the cost of ip_map_lookup(). With a modern kernel you would need more clients to see the same amount of performance improvement. This patch has helped to scale knfsd to handle a deployment with 2000 NFS clients. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-04 09:15:50 +00:00
if (ipm == NULL)
ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
&sin6->sin6_addr);
if (ipm == NULL)
return SVC_DENIED;
switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
default:
BUG();
case -ETIMEDOUT:
return SVC_CLOSE;
case -EAGAIN:
return SVC_DROP;
case -ENOENT:
return SVC_DENIED;
case 0:
rqstp->rq_client = &ipm->m_client->h;
kref_get(&rqstp->rq_client->ref);
ip_map_cached_put(xprt, ipm);
break;
}
gi = unix_gid_find(cred->cr_uid, rqstp);
switch (PTR_ERR(gi)) {
case -EAGAIN:
return SVC_DROP;
case -ESHUTDOWN:
return SVC_CLOSE;
case -ENOENT:
break;
default:
put_group_info(cred->cr_group_info);
cred->cr_group_info = gi;
}
out:
rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
knfsd: nfsd: set rq_client to ip-address-determined-domain We want it to be possible for users to restrict exports both by IP address and by pseudoflavor. The pseudoflavor information has previously been passed using special auth_domains stored in the rq_client field. After the preceding patch that stored the pseudoflavor in rq_pflavor, that's now superfluous; so now we use rq_client for the ip information, as auth_null and auth_unix do. However, we keep around the special auth_domain in the rq_gssclient field for backwards compatibility purposes, so we can still do upcalls using the old "gss/pseudoflavor" auth_domain if upcalls using the unix domain to give us an appropriate export. This allows us to continue supporting old mountd. In fact, for this first patch, we always use the "gss/pseudoflavor" auth_domain (and only it) if it is available; thus rq_client is ignored in the auth_gss case, and this patch on its own makes no change in behavior; that will be left to later patches. Note on idmap: I'm almost tempted to just replace the auth_domain in the idmap upcall by a dummy value--no version of idmapd has ever used it, and it's unlikely anyone really wants to perform idmapping differently depending on the where the client is (they may want to perform *credential* mapping differently, but that's a different matter--the idmapper just handles id's used in getattr and setattr). But I'm updating the idmapd code anyway, just out of general backwards-compatibility paranoia. Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-17 11:04:46 +00:00
static int
svcauth_null_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
if (argv->iov_len < 3*4)
return SVC_GARBAGE;
if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Signal that mapping to nobody uid/gid is required */
cred->cr_uid = INVALID_UID;
cred->cr_gid = INVALID_GID;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
return SVC_CLOSE; /* kmalloc failure - client must retry */
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
svc_putnl(resv, 0);
rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
return SVC_OK;
}
static int
svcauth_null_release(struct svc_rqst *rqstp)
{
if (rqstp->rq_client)
auth_domain_put(rqstp->rq_client);
rqstp->rq_client = NULL;
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
return 0; /* don't drop */
}
struct auth_ops svcauth_null = {
.name = "null",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_NULL,
.accept = svcauth_null_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
static int
svcauth_tls_accept(struct svc_rqst *rqstp)
{
struct svc_cred *cred = &rqstp->rq_cred;
struct kvec *argv = rqstp->rq_arg.head;
struct kvec *resv = rqstp->rq_res.head;
if (argv->iov_len < XDR_UNIT * 3)
return SVC_GARBAGE;
/* Call's cred length */
if (svc_getu32(argv) != xdr_zero) {
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
/* Call's verifier flavor and its length */
if (svc_getu32(argv) != rpc_auth_null ||
svc_getu32(argv) != xdr_zero) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
/* AUTH_TLS is not valid on non-NULL procedures */
if (rqstp->rq_proc != 0) {
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
/* Mapping to nobody uid/gid is required */
cred->cr_uid = INVALID_UID;
cred->cr_gid = INVALID_GID;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
return SVC_CLOSE; /* kmalloc failure - client must retry */
/* Reply's verifier */
svc_putnl(resv, RPC_AUTH_NULL);
if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
svc_putnl(resv, 8);
memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
resv->iov_len += 8;
} else
svc_putnl(resv, 0);
rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
return SVC_OK;
}
struct auth_ops svcauth_tls = {
.name = "tls",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_TLS,
.accept = svcauth_tls_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
static int
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
struct user_namespace *userns;
u32 slen, i;
int len = argv->iov_len;
if ((len -= 3*4) < 0)
return SVC_GARBAGE;
svc_getu32(argv); /* length */
svc_getu32(argv); /* time stamp */
slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */
if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred;
argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
/*
* Note: we skip uid_valid()/gid_valid() checks here for
* backwards compatibility with clients that use -1 id's.
* Instead, -1 uid or gid is later mapped to the
* (export-specific) anonymous id by nfsd_setuser.
* Supplementary gid's will be left alone.
*/
userns = (rqstp->rq_xprt && rqstp->rq_xprt->xpt_cred) ?
rqstp->rq_xprt->xpt_cred->user_ns : &init_user_ns;
cred->cr_uid = make_kuid(userns, svc_getnl(argv)); /* uid */
cred->cr_gid = make_kgid(userns, svc_getnl(argv)); /* gid */
slen = svc_getnl(argv); /* gids length */
if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
goto badcred;
cred->cr_group_info = groups_alloc(slen);
if (cred->cr_group_info == NULL)
return SVC_CLOSE;
for (i = 0; i < slen; i++) {
kgid_t kgid = make_kgid(userns, svc_getnl(argv));
cred: simpler, 1D supplementary groups Current supplementary groups code can massively overallocate memory and is implemented in a way so that access to individual gid is done via 2D array. If number of gids is <= 32, memory allocation is more or less tolerable (140/148 bytes). But if it is not, code allocates full page (!) regardless and, what's even more fun, doesn't reuse small 32-entry array. 2D array means dependent shifts, loads and LEAs without possibility to optimize them (gid is never known at compile time). All of the above is unnecessary. Switch to the usual trailing-zero-len-array scheme. Memory is allocated with kmalloc/vmalloc() and only as much as needed. Accesses become simpler (LEA 8(gi,idx,4) or even without displacement). Maximum number of gids is 65536 which translates to 256KB+8 bytes. I think kernel can handle such allocation. On my usual desktop system with whole 9 (nine) aux groups, struct group_info shrinks from 148 bytes to 44 bytes, yay! Nice side effects: - "gi->gid[i]" is shorter than "GROUP_AT(gi, i)", less typing, - fix little mess in net/ipv4/ping.c should have been using GROUP_AT macro but this point becomes moot, - aux group allocation is persistent and should be accounted as such. Link: http://lkml.kernel.org/r/20160817201927.GA2096@p183.telecom.by Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Cc: Vasily Kulikov <segoon@openwall.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 00:03:12 +00:00
cred->cr_group_info->gid[i] = kgid;
}
groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
svc_putnl(resv, 0);
rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
return SVC_OK;
badcred:
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
static int
svcauth_unix_release(struct svc_rqst *rqstp)
{
/* Verifier (such as it is) is already in place.
*/
if (rqstp->rq_client)
auth_domain_put(rqstp->rq_client);
rqstp->rq_client = NULL;
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
return 0;
}
struct auth_ops svcauth_unix = {
.name = "unix",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_UNIX,
.accept = svcauth_unix_accept,
.release = svcauth_unix_release,
.domain_release = svcauth_unix_domain_release,
.set_client = svcauth_unix_set_client,
};
static const struct cache_detail ip_map_cache_template = {
.owner = THIS_MODULE,
.hash_size = IP_HASHMAX,
.name = "auth.unix.ip",
.cache_put = ip_map_put,
.cache_upcall = ip_map_upcall,
.cache_request = ip_map_request,
.cache_parse = ip_map_parse,
.cache_show = ip_map_show,
.match = ip_map_match,
.init = ip_map_init,
.update = update,
.alloc = ip_map_alloc,
};
int ip_map_cache_create(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd;
int err;
cd = cache_create_net(&ip_map_cache_template, net);
if (IS_ERR(cd))
return PTR_ERR(cd);
err = cache_register_net(cd, net);
if (err) {
cache_destroy_net(cd, net);
return err;
}
sn->ip_map_cache = cd;
return 0;
}
void ip_map_cache_destroy(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd = sn->ip_map_cache;
sn->ip_map_cache = NULL;
cache_purge(cd);
cache_unregister_net(cd, net);
cache_destroy_net(cd, net);
}