mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 06:02:05 +00:00
vfs: fix dentry RCU to refcounting possibly sleeping dput()
This is the fix that the last two commits indirectly led up to - making sure that we don't call dput() in a bad context on the dentries we've looked up in RCU mode after the sequence count validation fails. This basically expands d_rcu_to_refcount() into the callers, and then fixes the callers to delay the dput() in the failure case until _after_ we've dropped all locks and are no longer in an RCU-locked region. The case of 'complete_walk()' was trivial, since its failure case did the unlock_rcu_walk() directly after the call to d_rcu_to_refcount(), and as such that is just a pure expansion of the function with a trivial movement of the resulting dput() to after 'unlock_rcu_walk()'. In contrast, the unlazy_walk() case was much more complicated, because not only does convert two different dentries from RCU to be reference counted, but it used to not call unlock_rcu_walk() at all, and instead just returned an error and let the caller clean everything up in "terminate_walk()". Happily, one of the dentries in question (called "parent" inside unlazy_walk()) is the dentry of "nd->path", which terminate_walk() wants a refcount to anyway for the non-RCU case. So what the new and improved unlazy_walk() does is to first turn that dentry into a refcounted one, and once that is set up, the error cases can continue to use the terminate_walk() helper for cleanup, but for the non-RCU case. Which makes it possible to drop out of RCU mode if we actually hit the sequence number failure case. Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
0d98439ea3
commit
e5c832d555
102
fs/namei.c
102
fs/namei.c
@ -494,37 +494,6 @@ static inline void unlock_rcu_walk(void)
|
|||||||
br_read_unlock(&vfsmount_lock);
|
br_read_unlock(&vfsmount_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* When we move over from the RCU domain to properly refcounted
|
|
||||||
* long-lived dentries, we need to check the sequence numbers
|
|
||||||
* we got before lookup very carefully.
|
|
||||||
*
|
|
||||||
* We cannot blindly increment a dentry refcount - even if it
|
|
||||||
* is not locked - if it is zero, because it may have gone
|
|
||||||
* through the final d_kill() logic already.
|
|
||||||
*
|
|
||||||
* So for a zero refcount, we need to get the spinlock (which is
|
|
||||||
* safe even for a dead dentry because the de-allocation is
|
|
||||||
* RCU-delayed), and check the sequence count under the lock.
|
|
||||||
*
|
|
||||||
* Once we have checked the sequence count, we know it is live,
|
|
||||||
* and since we hold the spinlock it cannot die from under us.
|
|
||||||
*
|
|
||||||
* In contrast, if the reference count wasn't zero, we can just
|
|
||||||
* increment the lockref without having to take the spinlock.
|
|
||||||
* Even if the sequence number ends up being stale, we haven't
|
|
||||||
* gone through the final dput() and killed the dentry yet.
|
|
||||||
*/
|
|
||||||
static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
|
|
||||||
{
|
|
||||||
if (likely(lockref_get_not_dead(&dentry->d_lockref))) {
|
|
||||||
if (!read_seqcount_retry(validate, seq))
|
|
||||||
return 0;
|
|
||||||
dput(dentry);
|
|
||||||
}
|
|
||||||
return -ECHILD;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* unlazy_walk - try to switch to ref-walk mode.
|
* unlazy_walk - try to switch to ref-walk mode.
|
||||||
* @nd: nameidata pathwalk data
|
* @nd: nameidata pathwalk data
|
||||||
@ -539,16 +508,29 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
|
|||||||
{
|
{
|
||||||
struct fs_struct *fs = current->fs;
|
struct fs_struct *fs = current->fs;
|
||||||
struct dentry *parent = nd->path.dentry;
|
struct dentry *parent = nd->path.dentry;
|
||||||
int want_root = 0;
|
|
||||||
|
|
||||||
BUG_ON(!(nd->flags & LOOKUP_RCU));
|
BUG_ON(!(nd->flags & LOOKUP_RCU));
|
||||||
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
|
|
||||||
want_root = 1;
|
/*
|
||||||
spin_lock(&fs->lock);
|
* Get a reference to the parent first: we're
|
||||||
if (nd->root.mnt != fs->root.mnt ||
|
* going to make "path_put(nd->path)" valid in
|
||||||
nd->root.dentry != fs->root.dentry)
|
* non-RCU context for "terminate_walk()".
|
||||||
goto err_root;
|
*
|
||||||
}
|
* If this doesn't work, return immediately with
|
||||||
|
* RCU walking still active (and then we will do
|
||||||
|
* the RCU walk cleanup in terminate_walk()).
|
||||||
|
*/
|
||||||
|
if (!lockref_get_not_dead(&parent->d_lockref))
|
||||||
|
return -ECHILD;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* After the mntget(), we terminate_walk() will do
|
||||||
|
* the right thing for non-RCU mode, and all our
|
||||||
|
* subsequent exit cases should unlock_rcu_walk()
|
||||||
|
* before returning.
|
||||||
|
*/
|
||||||
|
mntget(nd->path.mnt);
|
||||||
|
nd->flags &= ~LOOKUP_RCU;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For a negative lookup, the lookup sequence point is the parents
|
* For a negative lookup, the lookup sequence point is the parents
|
||||||
@ -562,30 +544,39 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
|
|||||||
* be valid if the child sequence number is still valid.
|
* be valid if the child sequence number is still valid.
|
||||||
*/
|
*/
|
||||||
if (!dentry) {
|
if (!dentry) {
|
||||||
if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
|
if (read_seqcount_retry(&parent->d_seq, nd->seq))
|
||||||
goto err_root;
|
goto out;
|
||||||
BUG_ON(nd->inode != parent->d_inode);
|
BUG_ON(nd->inode != parent->d_inode);
|
||||||
} else {
|
} else {
|
||||||
if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
|
if (!lockref_get_not_dead(&dentry->d_lockref))
|
||||||
goto err_root;
|
goto out;
|
||||||
if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
|
if (read_seqcount_retry(&dentry->d_seq, nd->seq))
|
||||||
goto err_parent;
|
goto drop_dentry;
|
||||||
}
|
}
|
||||||
if (want_root) {
|
|
||||||
|
/*
|
||||||
|
* Sequence counts matched. Now make sure that the root is
|
||||||
|
* still valid and get it if required.
|
||||||
|
*/
|
||||||
|
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
|
||||||
|
spin_lock(&fs->lock);
|
||||||
|
if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
|
||||||
|
goto unlock_and_drop_dentry;
|
||||||
path_get(&nd->root);
|
path_get(&nd->root);
|
||||||
spin_unlock(&fs->lock);
|
spin_unlock(&fs->lock);
|
||||||
}
|
}
|
||||||
mntget(nd->path.mnt);
|
|
||||||
|
|
||||||
unlock_rcu_walk();
|
unlock_rcu_walk();
|
||||||
nd->flags &= ~LOOKUP_RCU;
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_parent:
|
unlock_and_drop_dentry:
|
||||||
|
spin_unlock(&fs->lock);
|
||||||
|
drop_dentry:
|
||||||
|
unlock_rcu_walk();
|
||||||
dput(dentry);
|
dput(dentry);
|
||||||
err_root:
|
return -ECHILD;
|
||||||
if (want_root)
|
out:
|
||||||
spin_unlock(&fs->lock);
|
unlock_rcu_walk();
|
||||||
return -ECHILD;
|
return -ECHILD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -614,10 +605,15 @@ static int complete_walk(struct nameidata *nd)
|
|||||||
if (!(nd->flags & LOOKUP_ROOT))
|
if (!(nd->flags & LOOKUP_ROOT))
|
||||||
nd->root.mnt = NULL;
|
nd->root.mnt = NULL;
|
||||||
|
|
||||||
if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
|
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
|
||||||
unlock_rcu_walk();
|
unlock_rcu_walk();
|
||||||
return -ECHILD;
|
return -ECHILD;
|
||||||
}
|
}
|
||||||
|
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
|
||||||
|
unlock_rcu_walk();
|
||||||
|
dput(dentry);
|
||||||
|
return -ECHILD;
|
||||||
|
}
|
||||||
mntget(nd->path.mnt);
|
mntget(nd->path.mnt);
|
||||||
unlock_rcu_walk();
|
unlock_rcu_walk();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user