linux/include/net/af_unix.h
Eric Dumazet e370a72363 af_unix: improve STREAM behavior with fragmented memory
unix_stream_sendmsg() currently uses order-2 allocations,
and we had numerous reports this can fail.

The __GFP_REPEAT flag present in sock_alloc_send_pskb() is
not helping.

This patch extends the work done in commit eb6a24816b
("af_unix: reduce high order page allocations) for
datagram sockets.

This opens the possibility of zero copy IO (splice() and
friends)

The trick is to not use skb_pull() anymore in recvmsg() path,
and instead add a @consumed field in UNIXCB() to track amount
of already read payload in the skb.

There is a performance regression for large sends
because of extra page allocations that will be addressed
in a follow-up patch, allowing sock_alloc_send_pskb()
to attempt high order page allocations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-10 01:16:44 -07:00

82 lines
2.0 KiB
C

#ifndef __LINUX_NET_AFUNIX_H
#define __LINUX_NET_AFUNIX_H
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/mutex.h>
#include <net/sock.h>
void unix_inflight(struct file *fp);
void unix_notinflight(struct file *fp);
void unix_gc(void);
void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
struct sock *unix_peer_get(struct sock *);
#define UNIX_HASH_SIZE 256
#define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight;
extern spinlock_t unix_table_lock;
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address {
atomic_t refcnt;
int len;
unsigned int hash;
struct sockaddr_un name[0];
};
struct unix_skb_parms {
struct pid *pid; /* Skb credentials */
kuid_t uid;
kgid_t gid;
struct scm_fp_list *fp; /* Passed files */
#ifdef CONFIG_SECURITY_NETWORK
u32 secid; /* Security ID */
#endif
u32 consumed;
};
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
#define UNIXSID(skb) (&UNIXCB((skb)).secid)
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
#define unix_state_lock_nested(s) \
spin_lock_nested(&unix_sk(s)->lock, \
SINGLE_DEPTH_NESTING)
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
struct sock sk;
struct unix_address *addr;
struct path path;
struct mutex readlock;
struct sock *peer;
struct list_head link;
atomic_long_t inflight;
spinlock_t lock;
unsigned char recursion_level;
unsigned long gc_flags;
#define UNIX_GC_CANDIDATE 0
#define UNIX_GC_MAYBE_CYCLE 1
struct socket_wq peer_wq;
};
#define unix_sk(__sk) ((struct unix_sock *)__sk)
#define peer_wait peer_wq.wait
long unix_inq_len(struct sock *sk);
long unix_outq_len(struct sock *sk);
#ifdef CONFIG_SYSCTL
int unix_sysctl_register(struct net *net);
void unix_sysctl_unregister(struct net *net);
#else
static inline int unix_sysctl_register(struct net *net) { return 0; }
static inline void unix_sysctl_unregister(struct net *net) {}
#endif
#endif