mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
Merge branch 'vfs.file'
Bring in the changes to the file infrastructure for this cycle. Mostly cleanups and some performance tweaks. * file: remove __receive_fd() * file: stop exposing receive_fd_user() * fs: replace f_rcuhead with f_task_work * file: remove pointless wrapper * file: s/close_fd_get_file()/file_close_fd()/g * Improve __fget_files_rcu() code generation (and thus __fget_light()) * file: massage cleanup of files that failed to open Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
commit
2137e15642
@ -1921,7 +1921,7 @@ static void binder_deferred_fd_close(int fd)
|
||||
if (!twcb)
|
||||
return;
|
||||
init_task_work(&twcb->twork, binder_do_fd_close);
|
||||
twcb->file = close_fd_get_file(fd);
|
||||
twcb->file = file_close_fd(fd);
|
||||
if (twcb->file) {
|
||||
// pin it until binder_do_fd_close(); see comments there
|
||||
get_file(twcb->file);
|
||||
|
@ -1157,7 +1157,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
fput(f);
|
||||
break;
|
||||
}
|
||||
ret = receive_fd(f, perm_to_file_flags(entry.perm));
|
||||
ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
|
||||
fput(f);
|
||||
break;
|
||||
}
|
||||
|
99
fs/file.c
99
fs/file.c
@ -629,19 +629,23 @@ void fd_install(unsigned int fd, struct file *file)
|
||||
EXPORT_SYMBOL(fd_install);
|
||||
|
||||
/**
|
||||
* pick_file - return file associatd with fd
|
||||
* file_close_fd_locked - return file associated with fd
|
||||
* @files: file struct to retrieve file from
|
||||
* @fd: file descriptor to retrieve file for
|
||||
*
|
||||
* Doesn't take a separate reference count.
|
||||
*
|
||||
* Context: files_lock must be held.
|
||||
*
|
||||
* Returns: The file associated with @fd (NULL if @fd is not open)
|
||||
*/
|
||||
static struct file *pick_file(struct files_struct *files, unsigned fd)
|
||||
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd)
|
||||
{
|
||||
struct fdtable *fdt = files_fdtable(files);
|
||||
struct file *file;
|
||||
|
||||
lockdep_assert_held(&files->file_lock);
|
||||
|
||||
if (fd >= fdt->max_fds)
|
||||
return NULL;
|
||||
|
||||
@ -660,7 +664,7 @@ int close_fd(unsigned fd)
|
||||
struct file *file;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
file = pick_file(files, fd);
|
||||
file = file_close_fd_locked(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
@ -707,7 +711,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
|
||||
max_fd = min(max_fd, n);
|
||||
|
||||
for (; fd <= max_fd; fd++) {
|
||||
file = pick_file(files, fd);
|
||||
file = file_close_fd_locked(files, fd);
|
||||
if (file) {
|
||||
spin_unlock(&files->file_lock);
|
||||
filp_close(file, files);
|
||||
@ -795,26 +799,21 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* See close_fd_get_file() below, this variant assumes current->files->file_lock
|
||||
* is held.
|
||||
/**
|
||||
* file_close_fd - return file associated with fd
|
||||
* @fd: file descriptor to retrieve file for
|
||||
*
|
||||
* Doesn't take a separate reference count.
|
||||
*
|
||||
* Returns: The file associated with @fd (NULL if @fd is not open)
|
||||
*/
|
||||
struct file *__close_fd_get_file(unsigned int fd)
|
||||
{
|
||||
return pick_file(current->files, fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* variant of close_fd that gets a ref on the file for later fput.
|
||||
* The caller must ensure that filp_close() called on the file.
|
||||
*/
|
||||
struct file *close_fd_get_file(unsigned int fd)
|
||||
struct file *file_close_fd(unsigned int fd)
|
||||
{
|
||||
struct files_struct *files = current->files;
|
||||
struct file *file;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
file = pick_file(files, fd);
|
||||
file = file_close_fd_locked(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
|
||||
return file;
|
||||
@ -959,31 +958,45 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
|
||||
struct file *file;
|
||||
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
|
||||
struct file __rcu **fdentry;
|
||||
unsigned long nospec_mask;
|
||||
|
||||
if (unlikely(fd >= fdt->max_fds))
|
||||
return NULL;
|
||||
|
||||
fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
|
||||
/* Mask is a 0 for invalid fd's, ~0 for valid ones */
|
||||
nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);
|
||||
|
||||
/*
|
||||
* Ok, we have a file pointer. However, because we do
|
||||
* this all locklessly under RCU, we may be racing with
|
||||
* that file being closed.
|
||||
* fdentry points to the 'fd' offset, or fdt->fd[0].
|
||||
* Loading from fdt->fd[0] is always safe, because the
|
||||
* array always exists.
|
||||
*/
|
||||
fdentry = fdt->fd + (fd & nospec_mask);
|
||||
|
||||
/* Do the load, then mask any invalid result */
|
||||
file = rcu_dereference_raw(*fdentry);
|
||||
file = (void *)(nospec_mask & (unsigned long)file);
|
||||
if (unlikely(!file))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Ok, we have a file pointer that was valid at
|
||||
* some point, but it might have become stale since.
|
||||
*
|
||||
* We need to confirm it by incrementing the refcount
|
||||
* and then check the lookup again.
|
||||
*
|
||||
* atomic_long_inc_not_zero() gives us a full memory
|
||||
* barrier. We only really need an 'acquire' one to
|
||||
* protect the loads below, but we don't have that.
|
||||
*/
|
||||
if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Such a race can take two forms:
|
||||
*
|
||||
* (a) the file ref already went down to zero and the
|
||||
* file hasn't been reused yet or the file count
|
||||
* isn't zero but the file has already been reused.
|
||||
*/
|
||||
file = __get_file_rcu(fdentry);
|
||||
if (unlikely(!file))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(IS_ERR(file)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
*
|
||||
* (b) the file table entry has changed under us.
|
||||
* Note that we don't need to re-check the 'fdt->fd'
|
||||
* pointer having changed, because it always goes
|
||||
@ -991,7 +1004,8 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
|
||||
*
|
||||
* If so, we need to put our ref and try again.
|
||||
*/
|
||||
if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
|
||||
if (unlikely(file != rcu_dereference_raw(*fdentry)) ||
|
||||
unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
|
||||
fput(file);
|
||||
continue;
|
||||
}
|
||||
@ -1128,13 +1142,13 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
|
||||
* atomic_read_acquire() pairs with atomic_dec_and_test() in
|
||||
* put_files_struct().
|
||||
*/
|
||||
if (atomic_read_acquire(&files->count) == 1) {
|
||||
if (likely(atomic_read_acquire(&files->count) == 1)) {
|
||||
file = files_lookup_fd_raw(files, fd);
|
||||
if (!file || unlikely(file->f_mode & mask))
|
||||
return 0;
|
||||
return (unsigned long)file;
|
||||
} else {
|
||||
file = __fget(fd, mask);
|
||||
file = __fget_files(files, fd, mask);
|
||||
if (!file)
|
||||
return 0;
|
||||
return FDPUT_FPUT | (unsigned long)file;
|
||||
@ -1282,7 +1296,7 @@ out_unlock:
|
||||
}
|
||||
|
||||
/**
|
||||
* __receive_fd() - Install received file into file descriptor table
|
||||
* receive_fd() - Install received file into file descriptor table
|
||||
* @file: struct file that was received from another process
|
||||
* @ufd: __user pointer to write new fd number to
|
||||
* @o_flags: the O_* flags to apply to the new fd entry
|
||||
@ -1296,7 +1310,7 @@ out_unlock:
|
||||
*
|
||||
* Returns newly install fd or -ve on error.
|
||||
*/
|
||||
int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
{
|
||||
int new_fd;
|
||||
int error;
|
||||
@ -1321,6 +1335,7 @@ int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
__receive_sock(file);
|
||||
return new_fd;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(receive_fd);
|
||||
|
||||
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
|
||||
{
|
||||
@ -1336,12 +1351,6 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
|
||||
return new_fd;
|
||||
}
|
||||
|
||||
int receive_fd(struct file *file, unsigned int o_flags)
|
||||
{
|
||||
return __receive_fd(file, NULL, o_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(receive_fd);
|
||||
|
||||
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
|
||||
{
|
||||
int err = -EBADF;
|
||||
|
@ -75,18 +75,6 @@ static inline void file_free(struct file *f)
|
||||
}
|
||||
}
|
||||
|
||||
void release_empty_file(struct file *f)
|
||||
{
|
||||
WARN_ON_ONCE(f->f_mode & (FMODE_BACKING | FMODE_OPENED));
|
||||
if (atomic_long_dec_and_test(&f->f_count)) {
|
||||
security_file_free(f);
|
||||
put_cred(f->f_cred);
|
||||
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
|
||||
percpu_counter_dec(&nr_files);
|
||||
kmem_cache_free(filp_cachep, f);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the total number of open files in the system
|
||||
*/
|
||||
@ -419,7 +407,7 @@ static void delayed_fput(struct work_struct *unused)
|
||||
|
||||
static void ____fput(struct callback_head *work)
|
||||
{
|
||||
__fput(container_of(work, struct file, f_rcuhead));
|
||||
__fput(container_of(work, struct file, f_task_work));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -445,9 +433,13 @@ void fput(struct file *file)
|
||||
if (atomic_long_dec_and_test(&file->f_count)) {
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
|
||||
file_free(file);
|
||||
return;
|
||||
}
|
||||
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
|
||||
init_task_work(&file->f_rcuhead, ____fput);
|
||||
if (!task_work_add(task, &file->f_rcuhead, TWA_RESUME))
|
||||
init_task_work(&file->f_task_work, ____fput);
|
||||
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
|
||||
return;
|
||||
/*
|
||||
* After this task has run exit_task_work(),
|
||||
|
@ -94,7 +94,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
struct file *alloc_empty_file(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
||||
void release_empty_file(struct file *f);
|
||||
|
||||
static inline void file_put_write_access(struct file *file)
|
||||
{
|
||||
@ -180,7 +179,7 @@ extern struct file *do_file_open_root(const struct path *,
|
||||
const char *, const struct open_flags *);
|
||||
extern struct open_how build_open_how(int flags, umode_t mode);
|
||||
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
|
||||
extern struct file *__close_fd_get_file(unsigned int fd);
|
||||
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
|
||||
|
||||
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
int chmod_common(const struct path *path, umode_t mode);
|
||||
|
@ -3785,10 +3785,7 @@ static struct file *path_openat(struct nameidata *nd,
|
||||
WARN_ON(1);
|
||||
error = -EINVAL;
|
||||
}
|
||||
if (unlikely(file->f_mode & FMODE_OPENED))
|
||||
fput(file);
|
||||
else
|
||||
release_empty_file(file);
|
||||
fput(file);
|
||||
if (error == -EOPENSTALE) {
|
||||
if (flags & LOOKUP_RCU)
|
||||
error = -ECHILD;
|
||||
|
@ -1578,7 +1578,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
int retval;
|
||||
struct file *file;
|
||||
|
||||
file = close_fd_get_file(fd);
|
||||
file = file_close_fd(fd);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
|
@ -83,12 +83,17 @@ struct dentry;
|
||||
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
|
||||
{
|
||||
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
|
||||
unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
|
||||
struct file *needs_masking;
|
||||
|
||||
if (fd < fdt->max_fds) {
|
||||
fd = array_index_nospec(fd, fdt->max_fds);
|
||||
return rcu_dereference_raw(fdt->fd[fd]);
|
||||
}
|
||||
return NULL;
|
||||
/*
|
||||
* 'mask' is zero for an out-of-bounds fd, all ones for ok.
|
||||
* 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
|
||||
*
|
||||
* Accessing fdt->fd[0] is ok, but needs masking of the result.
|
||||
*/
|
||||
needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
|
||||
return (struct file *)(mask & (unsigned long)needs_masking);
|
||||
}
|
||||
|
||||
static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
|
||||
@ -114,7 +119,7 @@ int iterate_fd(struct files_struct *, unsigned,
|
||||
|
||||
extern int close_fd(unsigned int fd);
|
||||
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
|
||||
extern struct file *close_fd_get_file(unsigned int fd);
|
||||
extern struct file *file_close_fd(unsigned int fd);
|
||||
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
||||
struct files_struct **new_fdp);
|
||||
|
||||
|
@ -96,18 +96,8 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
|
||||
|
||||
extern void fd_install(unsigned int fd, struct file *file);
|
||||
|
||||
extern int __receive_fd(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags);
|
||||
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
|
||||
|
||||
extern int receive_fd(struct file *file, unsigned int o_flags);
|
||||
|
||||
static inline int receive_fd_user(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags)
|
||||
{
|
||||
if (ufd == NULL)
|
||||
return -EFAULT;
|
||||
return __receive_fd(file, ufd, o_flags);
|
||||
}
|
||||
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
|
||||
|
||||
extern void flush_delayed_fput(void);
|
||||
|
@ -991,8 +991,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
|
||||
*/
|
||||
struct file {
|
||||
union {
|
||||
/* fput() uses task work when closing and freeing file (default). */
|
||||
struct callback_head f_task_work;
|
||||
/* fput() must use workqueue (most kernel threads). */
|
||||
struct llist_node f_llist;
|
||||
struct rcu_head f_rcuhead;
|
||||
unsigned int f_iocb_flags;
|
||||
};
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <linux/limits.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/cred.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/nsproxy.h>
|
||||
@ -208,5 +209,13 @@ static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
|
||||
scm_destroy_cred(scm);
|
||||
}
|
||||
|
||||
static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (!ufd)
|
||||
return -EFAULT;
|
||||
return receive_fd(f, ufd, flags);
|
||||
}
|
||||
|
||||
#endif /* __LINUX_NET_SCM_H */
|
||||
|
||||
|
@ -241,7 +241,7 @@ int io_close(struct io_kiocb *req, unsigned int issue_flags)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
file = __close_fd_get_file(close->fd);
|
||||
file = file_close_fd_locked(files, close->fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
if (!file)
|
||||
goto err;
|
||||
|
@ -700,7 +700,7 @@ static int pidfd_getfd(struct pid *pid, int fd)
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ret = receive_fd(file, O_CLOEXEC);
|
||||
ret = receive_fd(file, NULL, O_CLOEXEC);
|
||||
fput(file);
|
||||
|
||||
return ret;
|
||||
|
@ -1072,7 +1072,7 @@ static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_kn
|
||||
*/
|
||||
list_del_init(&addfd->list);
|
||||
if (!addfd->setfd)
|
||||
fd = receive_fd(addfd->file, addfd->flags);
|
||||
fd = receive_fd(addfd->file, NULL, addfd->flags);
|
||||
else
|
||||
fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
|
||||
addfd->ret = fd;
|
||||
|
@ -297,7 +297,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
|
||||
int err = 0, i;
|
||||
|
||||
for (i = 0; i < fdmax; i++) {
|
||||
err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
@ -319,7 +319,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
|
||||
}
|
||||
|
||||
for (i = 0; i < fdmax; i++) {
|
||||
err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user