libfs: improve path_from_stashed() helper

In earlier patches we moved both nsfs and pidfs to path_from_stashed().
The helper currently tries to add and stash a new dentry if a reusable
dentry couldn't be found and returns EAGAIN if it lost the race to stash
the dentry. The caller can use EAGAIN to retry.

The helper and the two filesystems be written in a way that makes
returning EAGAIN unnecessary. To do this we need to change the
dentry->d_prune() implementation of nsfs and pidfs to not simply replace
the stashed dentry with NULL but to use a cmpxchg() and only replace
their own dentry.

Then path_from_stashed() can then be changed to not just stash a new
dentry when no dentry is currently stashed but also when an already dead
dentry is stashed. If another task managed to install a dentry in the
meantime it can simply be reused. Pack that into a loop and call it a
day.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wgtLF5Z5=15-LKAczWm=-tUjHO+Bpf7WjBG+UU3s=fEQw@mail.gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2024-02-18 14:52:24 +01:00
parent b28ddcc32d
commit 159a0d9fd5
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
3 changed files with 74 additions and 67 deletions

View File

@ -1988,11 +1988,11 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed)
return dentry;
}
static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino,
struct super_block *sb,
const struct file_operations *fops,
const struct inode_operations *iops,
void *data)
static struct dentry *prepare_anon_dentry(unsigned long ino,
struct super_block *sb,
const struct file_operations *fops,
const struct inode_operations *iops,
void *data)
{
struct dentry *dentry;
struct inode *inode;
@ -2021,17 +2021,31 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino,
/* @data is now owned by the fs */
d_instantiate(dentry, inode);
if (cmpxchg(stashed, NULL, dentry)) {
d_delete(dentry); /* make sure ->d_prune() does nothing */
dput(dentry);
cpu_relax();
return ERR_PTR(-EAGAIN);
}
return dentry;
}
static struct dentry *stash_dentry(struct dentry **stashed,
struct dentry *dentry)
{
guard(rcu)();
for (;;) {
struct dentry *old;
/* Assume any old dentry was cleared out. */
old = cmpxchg(stashed, NULL, dentry);
if (likely(!old))
return dentry;
/* Check if somebody else installed a reusable dentry. */
if (lockref_get_not_dead(&old->d_lockref))
return old;
/* There's an old dead dentry there, try to take it over. */
if (likely(try_cmpxchg(stashed, &old, dentry)))
return dentry;
}
}
/**
* path_from_stashed - create path from stashed or new dentry
* @stashed: where to retrieve or stash dentry
@ -2044,15 +2058,14 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino,
*
* The function tries to retrieve a stashed dentry from @stashed. If the dentry
* is still valid then it will be reused. If the dentry isn't able the function
* will allocate a new dentry and inode. It will then try to update @stashed
* with the newly added dentry. If it fails -EAGAIN is returned and the caller
* my retry.
* will allocate a new dentry and inode. It will then check again whether it
* can reuse an existing dentry in case one has been added in the meantime or
* update @stashed with the newly added dentry.
*
* Special-purpose helper for nsfs and pidfs.
*
* Return: If 0 or an error is returned the caller can be sure that @data must
* be cleaned up. If 1 or -EAGAIN is returned @data is owned by the
* filesystem.
* be cleaned up. If 1 is returned @data is owned by the filesystem.
*/
int path_from_stashed(struct dentry **stashed, unsigned long ino,
struct vfsmount *mnt, const struct file_operations *fops,
@ -2062,17 +2075,23 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino,
struct dentry *dentry;
int ret = 0;
dentry = get_stashed_dentry(*stashed);
if (dentry)
/* See if dentry can be reused. */
path->dentry = get_stashed_dentry(*stashed);
if (path->dentry)
goto out_path;
dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data);
/* Allocate a new dentry. */
dentry = prepare_anon_dentry(ino, mnt->mnt_sb, fops, iops, data);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
/* Added a new dentry. @data is now owned by the filesystem. */
path->dentry = stash_dentry(stashed, dentry);
if (path->dentry != dentry)
dput(dentry);
ret = 1;
out_path:
path->dentry = dentry;
path->mnt = mntget(mnt);
return ret;
}

View File

@ -36,10 +36,12 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
static void ns_prune_dentry(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct inode *inode;
inode = d_inode(dentry);
if (inode) {
struct ns_common *ns = inode->i_private;
WRITE_ONCE(ns->stashed, NULL);
cmpxchg(&ns->stashed, dentry, NULL);
}
}
@ -61,20 +63,17 @@ int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
void *private_data)
{
int ret;
struct ns_common *ns;
do {
struct ns_common *ns = ns_get_cb(private_data);
if (!ns)
return -ENOENT;
ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt,
&ns_file_operations, NULL, ns, path);
if (ret <= 0 && ret != -EAGAIN)
ns->ops->put(ns);
} while (ret == -EAGAIN);
ns = ns_get_cb(private_data);
if (!ns)
return -ENOENT;
ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt,
&ns_file_operations, NULL, ns, path);
if (ret <= 0)
ns->ops->put(ns);
if (ret < 0)
return ret;
return 0;
}
@ -105,6 +104,7 @@ int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns))
{
struct path path = {};
struct ns_common *relative;
struct file *f;
int err;
int fd;
@ -113,22 +113,16 @@ int open_related_ns(struct ns_common *ns,
if (fd < 0)
return fd;
do {
struct ns_common *relative;
relative = get_ns(ns);
if (IS_ERR(relative)) {
put_unused_fd(fd);
return PTR_ERR(relative);
}
err = path_from_stashed(&relative->stashed, relative->inum,
nsfs_mnt, &ns_file_operations, NULL,
relative, &path);
if (err <= 0 && err != -EAGAIN)
relative->ops->put(relative);
} while (err == -EAGAIN);
relative = get_ns(ns);
if (IS_ERR(relative)) {
put_unused_fd(fd);
return PTR_ERR(relative);
}
err = path_from_stashed(&relative->stashed, relative->inum, nsfs_mnt,
&ns_file_operations, NULL, relative, &path);
if (err <= 0)
relative->ops->put(relative);
if (err < 0) {
put_unused_fd(fd);
return err;

View File

@ -140,7 +140,6 @@ struct pid *pidfd_pid(const struct file *file)
#ifdef CONFIG_FS_PID
static struct vfsmount *pidfs_mnt __ro_after_init;
static struct super_block *pidfs_sb __ro_after_init;
/*
* The vfs falls back to simple_setattr() if i_op->setattr() isn't
@ -195,7 +194,7 @@ static void pidfs_prune_dentry(struct dentry *dentry)
inode = d_inode(dentry);
if (inode) {
struct pid *pid = inode->i_private;
WRITE_ONCE(pid->stashed, NULL);
cmpxchg(&pid->stashed, dentry, NULL);
}
}
@ -231,19 +230,16 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
struct path path;
int ret;
do {
/*
* Inode numbering for pidfs start at RESERVED_PIDS + 1.
* This avoids collisions with the root inode which is 1
* for pseudo filesystems.
*/
ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt,
&pidfs_file_operations,
&pidfs_inode_operations, get_pid(pid),
&path);
if (ret <= 0 && ret != -EAGAIN)
put_pid(pid);
} while (ret == -EAGAIN);
/*
* Inode numbering for pidfs start at RESERVED_PIDS + 1.
* This avoids collisions with the root inode which is 1
* for pseudo filesystems.
*/
ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt,
&pidfs_file_operations, &pidfs_inode_operations,
get_pid(pid), &path);
if (ret <= 0)
put_pid(pid);
if (ret < 0)
return ERR_PTR(ret);
@ -257,8 +253,6 @@ void __init pidfs_init(void)
pidfs_mnt = kern_mount(&pidfs_type);
if (IS_ERR(pidfs_mnt))
panic("Failed to mount pidfs pseudo filesystem");
pidfs_sb = pidfs_mnt->mnt_sb;
}
bool is_pidfs_sb(const struct super_block *sb)