7d267278a9
Rainer Weikusat <rweikusat@mobileactivedefense.com> writes:
An AF_UNIX datagram socket being the client in an n:1 association with
some server socket is only allowed to send messages to the server if the
receive queue of this socket contains at most sk_max_ack_backlog
datagrams. This implies that prospective writers might be forced to go
to sleep despite none of the message presently enqueued on the server
receive queue were sent by them. In order to ensure that these will be
woken up once space becomes again available, the present unix_dgram_poll
routine does a second sock_poll_wait call with the peer_wait wait queue
of the server socket as queue argument (unix_dgram_recvmsg does a wake
up on this queue after a datagram was received). This is inherently
problematic because the server socket is only guaranteed to remain alive
for as long as the client still holds a reference to it. In case the
connection is dissolved via connect or by the dead peer detection logic
in unix_dgram_sendmsg, the server socket may be freed despite "the
polling mechanism" (in particular, epoll) still has a pointer to the
corresponding peer_wait queue. There's no way to forcibly deregister a
wait queue with epoll.
Based on an idea by Jason Baron, the patch below changes the code such
that a wait_queue_t belonging to the client socket is enqueued on the
peer_wait queue of the server whenever the peer receive queue full
condition is detected by either a sendmsg or a poll. A wake up on the
peer queue is then relayed to the ordinary wait queue of the client
socket via wake function. The connection to the peer wait queue is again
dissolved if either a wake up is about to be relayed or the client
socket reconnects or a dead peer is detected or the client socket is
itself closed. This enables removing the second sock_poll_wait from
unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
that no blocked writer sleeps forever.
Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Fixes: ec0d215f94
("af_unix: fix 'poll for write'/connected DGRAM sockets")
Reviewed-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
86 lines
2.1 KiB
C
86 lines
2.1 KiB
C
#ifndef __LINUX_NET_AFUNIX_H
|
|
#define __LINUX_NET_AFUNIX_H
|
|
|
|
#include <linux/socket.h>
|
|
#include <linux/un.h>
|
|
#include <linux/mutex.h>
|
|
#include <net/sock.h>
|
|
|
|
void unix_inflight(struct file *fp);
|
|
void unix_notinflight(struct file *fp);
|
|
void unix_gc(void);
|
|
void wait_for_unix_gc(void);
|
|
struct sock *unix_get_socket(struct file *filp);
|
|
struct sock *unix_peer_get(struct sock *);
|
|
|
|
#define UNIX_HASH_SIZE 256
|
|
#define UNIX_HASH_BITS 8
|
|
|
|
extern unsigned int unix_tot_inflight;
|
|
extern spinlock_t unix_table_lock;
|
|
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
|
|
|
|
struct unix_address {
|
|
atomic_t refcnt;
|
|
int len;
|
|
unsigned int hash;
|
|
struct sockaddr_un name[0];
|
|
};
|
|
|
|
struct unix_skb_parms {
|
|
struct pid *pid; /* Skb credentials */
|
|
kuid_t uid;
|
|
kgid_t gid;
|
|
struct scm_fp_list *fp; /* Passed files */
|
|
#ifdef CONFIG_SECURITY_NETWORK
|
|
u32 secid; /* Security ID */
|
|
#endif
|
|
u32 consumed;
|
|
};
|
|
|
|
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
|
|
|
|
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
|
|
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
|
|
#define unix_state_lock_nested(s) \
|
|
spin_lock_nested(&unix_sk(s)->lock, \
|
|
SINGLE_DEPTH_NESTING)
|
|
|
|
/* The AF_UNIX socket */
|
|
struct unix_sock {
|
|
/* WARNING: sk has to be the first member */
|
|
struct sock sk;
|
|
struct unix_address *addr;
|
|
struct path path;
|
|
struct mutex readlock;
|
|
struct sock *peer;
|
|
struct list_head link;
|
|
atomic_long_t inflight;
|
|
spinlock_t lock;
|
|
unsigned char recursion_level;
|
|
unsigned long gc_flags;
|
|
#define UNIX_GC_CANDIDATE 0
|
|
#define UNIX_GC_MAYBE_CYCLE 1
|
|
struct socket_wq peer_wq;
|
|
wait_queue_t peer_wake;
|
|
};
|
|
|
|
static inline struct unix_sock *unix_sk(const struct sock *sk)
|
|
{
|
|
return (struct unix_sock *)sk;
|
|
}
|
|
|
|
#define peer_wait peer_wq.wait
|
|
|
|
long unix_inq_len(struct sock *sk);
|
|
long unix_outq_len(struct sock *sk);
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
int unix_sysctl_register(struct net *net);
|
|
void unix_sysctl_unregister(struct net *net);
|
|
#else
|
|
static inline int unix_sysctl_register(struct net *net) { return 0; }
|
|
static inline void unix_sysctl_unregister(struct net *net) {}
|
|
#endif
|
|
#endif
|