forked from Minki/linux
af_unix: Fix splice-bind deadlock
On 2015/11/06, Dmitry Vyukov reported a deadlock involving the splice system call and AF_UNIX sockets, http://lists.openwall.net/netdev/2015/11/06/24 The situation was analyzed as (a while ago) A: socketpair() B: splice() from a pipe to /mnt/regular_file does sb_start_write() on /mnt C: try to freeze /mnt wait for B to finish with /mnt A: bind() try to bind our socket to /mnt/new_socket_name lock our socket, see it not bound yet decide that it needs to create something in /mnt try to do sb_start_write() on /mnt, block (it's waiting for C). D: splice() from the same pipe to our socket lock the pipe, see that socket is connected try to lock the socket, block waiting for A B: get around to actually feeding a chunk from pipe to file, try to lock the pipe. Deadlock. on 2015/11/10 by Al Viro, http://lists.openwall.net/netdev/2015/11/10/4 The patch fixes this by removing the kern_path_create related code from unix_mknod and executing it as part of unix_bind prior acquiring the readlock of the socket in question. This means that A (as used above) will sb_start_write on /mnt before it acquires the readlock, hence, it won't indirectly block B which first did a sb_start_write and then waited for a thread trying to acquire the readlock. Consequently, A being blocked by C waiting for B won't cause a deadlock anymore (effectively, both A and B acquire two locks in opposite order in the situation described above). Dmitry Vyukov(<dvyukov@google.com>) tested the original patch. Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b5bdacf3bb
commit
c845acb324
@ -953,32 +953,20 @@ fail:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
|
static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
|
||||||
|
struct path *res)
|
||||||
{
|
{
|
||||||
struct dentry *dentry;
|
int err;
|
||||||
struct path path;
|
|
||||||
int err = 0;
|
|
||||||
/*
|
|
||||||
* Get the parent directory, calculate the hash for last
|
|
||||||
* component.
|
|
||||||
*/
|
|
||||||
dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
|
|
||||||
err = PTR_ERR(dentry);
|
|
||||||
if (IS_ERR(dentry))
|
|
||||||
return err;
|
|
||||||
|
|
||||||
/*
|
err = security_path_mknod(path, dentry, mode, 0);
|
||||||
* All right, let's create it.
|
|
||||||
*/
|
|
||||||
err = security_path_mknod(&path, dentry, mode, 0);
|
|
||||||
if (!err) {
|
if (!err) {
|
||||||
err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
|
err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
res->mnt = mntget(path.mnt);
|
res->mnt = mntget(path->mnt);
|
||||||
res->dentry = dget(dentry);
|
res->dentry = dget(dentry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
done_path_create(&path, dentry);
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||||||
struct unix_sock *u = unix_sk(sk);
|
struct unix_sock *u = unix_sk(sk);
|
||||||
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
|
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
|
||||||
char *sun_path = sunaddr->sun_path;
|
char *sun_path = sunaddr->sun_path;
|
||||||
int err;
|
int err, name_err;
|
||||||
unsigned int hash;
|
unsigned int hash;
|
||||||
struct unix_address *addr;
|
struct unix_address *addr;
|
||||||
struct hlist_head *list;
|
struct hlist_head *list;
|
||||||
|
struct path path;
|
||||||
|
struct dentry *dentry;
|
||||||
|
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
if (sunaddr->sun_family != AF_UNIX)
|
if (sunaddr->sun_family != AF_UNIX)
|
||||||
@ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||||||
goto out;
|
goto out;
|
||||||
addr_len = err;
|
addr_len = err;
|
||||||
|
|
||||||
|
name_err = 0;
|
||||||
|
dentry = NULL;
|
||||||
|
if (sun_path[0]) {
|
||||||
|
/* Get the parent directory, calculate the hash for last
|
||||||
|
* component.
|
||||||
|
*/
|
||||||
|
dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
|
||||||
|
|
||||||
|
if (IS_ERR(dentry)) {
|
||||||
|
/* delay report until after 'already bound' check */
|
||||||
|
name_err = PTR_ERR(dentry);
|
||||||
|
dentry = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = mutex_lock_interruptible(&u->readlock);
|
err = mutex_lock_interruptible(&u->readlock);
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out_path;
|
||||||
|
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
if (u->addr)
|
if (u->addr)
|
||||||
goto out_up;
|
goto out_up;
|
||||||
|
|
||||||
|
if (name_err) {
|
||||||
|
err = name_err == -EEXIST ? -EADDRINUSE : name_err;
|
||||||
|
goto out_up;
|
||||||
|
}
|
||||||
|
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
|
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
|
||||||
if (!addr)
|
if (!addr)
|
||||||
@ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||||||
addr->hash = hash ^ sk->sk_type;
|
addr->hash = hash ^ sk->sk_type;
|
||||||
atomic_set(&addr->refcnt, 1);
|
atomic_set(&addr->refcnt, 1);
|
||||||
|
|
||||||
if (sun_path[0]) {
|
if (dentry) {
|
||||||
struct path path;
|
struct path u_path;
|
||||||
umode_t mode = S_IFSOCK |
|
umode_t mode = S_IFSOCK |
|
||||||
(SOCK_INODE(sock)->i_mode & ~current_umask());
|
(SOCK_INODE(sock)->i_mode & ~current_umask());
|
||||||
err = unix_mknod(sun_path, mode, &path);
|
err = unix_mknod(dentry, &path, mode, &u_path);
|
||||||
if (err) {
|
if (err) {
|
||||||
if (err == -EEXIST)
|
if (err == -EEXIST)
|
||||||
err = -EADDRINUSE;
|
err = -EADDRINUSE;
|
||||||
@ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||||||
goto out_up;
|
goto out_up;
|
||||||
}
|
}
|
||||||
addr->hash = UNIX_HASH_SIZE;
|
addr->hash = UNIX_HASH_SIZE;
|
||||||
hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
|
hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
|
||||||
spin_lock(&unix_table_lock);
|
spin_lock(&unix_table_lock);
|
||||||
u->path = path;
|
u->path = u_path;
|
||||||
list = &unix_socket_table[hash];
|
list = &unix_socket_table[hash];
|
||||||
} else {
|
} else {
|
||||||
spin_lock(&unix_table_lock);
|
spin_lock(&unix_table_lock);
|
||||||
@ -1063,6 +1073,10 @@ out_unlock:
|
|||||||
spin_unlock(&unix_table_lock);
|
spin_unlock(&unix_table_lock);
|
||||||
out_up:
|
out_up:
|
||||||
mutex_unlock(&u->readlock);
|
mutex_unlock(&u->readlock);
|
||||||
|
out_path:
|
||||||
|
if (dentry)
|
||||||
|
done_path_create(&path, dentry);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user