forked from Minki/linux
vfs: fix dentry RCU to refcounting possibly sleeping dput()
This is the fix that the last two commits indirectly led up to - making sure that we don't call dput() in a bad context on the dentries we've looked up in RCU mode after the sequence count validation fails. This basically expands d_rcu_to_refcount() into the callers, and then fixes the callers to delay the dput() in the failure case until _after_ we've dropped all locks and are no longer in an RCU-locked region. The case of 'complete_walk()' was trivial, since its failure case did the unlock_rcu_walk() directly after the call to d_rcu_to_refcount(), and as such that is just a pure expansion of the function with a trivial movement of the resulting dput() to after 'unlock_rcu_walk()'. In contrast, the unlazy_walk() case was much more complicated, because not only does convert two different dentries from RCU to be reference counted, but it used to not call unlock_rcu_walk() at all, and instead just returned an error and let the caller clean everything up in "terminate_walk()". Happily, one of the dentries in question (called "parent" inside unlazy_walk()) is the dentry of "nd->path", which terminate_walk() wants a refcount to anyway for the non-RCU case. So what the new and improved unlazy_walk() does is to first turn that dentry into a refcounted one, and once that is set up, the error cases can continue to use the terminate_walk() helper for cleanup, but for the non-RCU case. Which makes it possible to drop out of RCU mode if we actually hit the sequence number failure case. Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
0d98439ea3
commit
e5c832d555
102
fs/namei.c
102
fs/namei.c
@ -494,37 +494,6 @@ static inline void unlock_rcu_walk(void)
|
||||
br_read_unlock(&vfsmount_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* When we move over from the RCU domain to properly refcounted
|
||||
* long-lived dentries, we need to check the sequence numbers
|
||||
* we got before lookup very carefully.
|
||||
*
|
||||
* We cannot blindly increment a dentry refcount - even if it
|
||||
* is not locked - if it is zero, because it may have gone
|
||||
* through the final d_kill() logic already.
|
||||
*
|
||||
* So for a zero refcount, we need to get the spinlock (which is
|
||||
* safe even for a dead dentry because the de-allocation is
|
||||
* RCU-delayed), and check the sequence count under the lock.
|
||||
*
|
||||
* Once we have checked the sequence count, we know it is live,
|
||||
* and since we hold the spinlock it cannot die from under us.
|
||||
*
|
||||
* In contrast, if the reference count wasn't zero, we can just
|
||||
* increment the lockref without having to take the spinlock.
|
||||
* Even if the sequence number ends up being stale, we haven't
|
||||
* gone through the final dput() and killed the dentry yet.
|
||||
*/
|
||||
static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
|
||||
{
|
||||
if (likely(lockref_get_not_dead(&dentry->d_lockref))) {
|
||||
if (!read_seqcount_retry(validate, seq))
|
||||
return 0;
|
||||
dput(dentry);
|
||||
}
|
||||
return -ECHILD;
|
||||
}
|
||||
|
||||
/**
|
||||
* unlazy_walk - try to switch to ref-walk mode.
|
||||
* @nd: nameidata pathwalk data
|
||||
@ -539,16 +508,29 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
|
||||
{
|
||||
struct fs_struct *fs = current->fs;
|
||||
struct dentry *parent = nd->path.dentry;
|
||||
int want_root = 0;
|
||||
|
||||
BUG_ON(!(nd->flags & LOOKUP_RCU));
|
||||
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
|
||||
want_root = 1;
|
||||
spin_lock(&fs->lock);
|
||||
if (nd->root.mnt != fs->root.mnt ||
|
||||
nd->root.dentry != fs->root.dentry)
|
||||
goto err_root;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a reference to the parent first: we're
|
||||
* going to make "path_put(nd->path)" valid in
|
||||
* non-RCU context for "terminate_walk()".
|
||||
*
|
||||
* If this doesn't work, return immediately with
|
||||
* RCU walking still active (and then we will do
|
||||
* the RCU walk cleanup in terminate_walk()).
|
||||
*/
|
||||
if (!lockref_get_not_dead(&parent->d_lockref))
|
||||
return -ECHILD;
|
||||
|
||||
/*
|
||||
* After the mntget(), we terminate_walk() will do
|
||||
* the right thing for non-RCU mode, and all our
|
||||
* subsequent exit cases should unlock_rcu_walk()
|
||||
* before returning.
|
||||
*/
|
||||
mntget(nd->path.mnt);
|
||||
nd->flags &= ~LOOKUP_RCU;
|
||||
|
||||
/*
|
||||
* For a negative lookup, the lookup sequence point is the parents
|
||||
@ -562,30 +544,39 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
|
||||
* be valid if the child sequence number is still valid.
|
||||
*/
|
||||
if (!dentry) {
|
||||
if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
|
||||
goto err_root;
|
||||
if (read_seqcount_retry(&parent->d_seq, nd->seq))
|
||||
goto out;
|
||||
BUG_ON(nd->inode != parent->d_inode);
|
||||
} else {
|
||||
if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
|
||||
goto err_root;
|
||||
if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
|
||||
goto err_parent;
|
||||
if (!lockref_get_not_dead(&dentry->d_lockref))
|
||||
goto out;
|
||||
if (read_seqcount_retry(&dentry->d_seq, nd->seq))
|
||||
goto drop_dentry;
|
||||
}
|
||||
if (want_root) {
|
||||
|
||||
/*
|
||||
* Sequence counts matched. Now make sure that the root is
|
||||
* still valid and get it if required.
|
||||
*/
|
||||
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
|
||||
spin_lock(&fs->lock);
|
||||
if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
|
||||
goto unlock_and_drop_dentry;
|
||||
path_get(&nd->root);
|
||||
spin_unlock(&fs->lock);
|
||||
}
|
||||
mntget(nd->path.mnt);
|
||||
|
||||
unlock_rcu_walk();
|
||||
nd->flags &= ~LOOKUP_RCU;
|
||||
return 0;
|
||||
|
||||
err_parent:
|
||||
unlock_and_drop_dentry:
|
||||
spin_unlock(&fs->lock);
|
||||
drop_dentry:
|
||||
unlock_rcu_walk();
|
||||
dput(dentry);
|
||||
err_root:
|
||||
if (want_root)
|
||||
spin_unlock(&fs->lock);
|
||||
return -ECHILD;
|
||||
out:
|
||||
unlock_rcu_walk();
|
||||
return -ECHILD;
|
||||
}
|
||||
|
||||
@ -614,10 +605,15 @@ static int complete_walk(struct nameidata *nd)
|
||||
if (!(nd->flags & LOOKUP_ROOT))
|
||||
nd->root.mnt = NULL;
|
||||
|
||||
if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
|
||||
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
|
||||
unlock_rcu_walk();
|
||||
return -ECHILD;
|
||||
}
|
||||
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
|
||||
unlock_rcu_walk();
|
||||
dput(dentry);
|
||||
return -ECHILD;
|
||||
}
|
||||
mntget(nd->path.mnt);
|
||||
unlock_rcu_walk();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user