pidns: Make the pidns proc mount/umount logic obvious.
Track the number of pids in the proc hash table. When the number of pids goes to 0 schedule work to unmount the kernel mount of proc. Move the mount of proc into alloc_pid when we allocate the pid for init. Remove the surprising calls of pid_ns_release proc in fork and proc_flush_task. Those code paths really shouldn't know about proc namespace implementation details and people have demonstrated several times that finding and understanding those code paths is difficult and non-obvious. Because of the call path detach pid is alwasy called with the rtnl_lock held free_pid is not allowed to sleep, so the work to unmounting proc is moved to a work queue. This has the side benefit of not blocking the entire world waiting for the unnecessary rcu_barrier in deactivate_locked_super. In the process of making the code clear and obvious this fixes a bug reported by Gao feng <gaofeng@cn.fujitsu.com> where we would leak a mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns succeeded and copy_net_ns failed. Acked-by: "Serge E. Hallyn" <serge@hallyn.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
This commit is contained in:
@@ -72,6 +72,12 @@ err_alloc:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void proc_cleanup_work(struct work_struct *work)
|
||||
{
|
||||
struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
|
||||
pid_ns_release_proc(ns);
|
||||
}
|
||||
|
||||
/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
|
||||
#define MAX_PID_NS_LEVEL 32
|
||||
|
||||
@@ -105,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
|
||||
ns->level = level;
|
||||
ns->parent = get_pid_ns(parent_pid_ns);
|
||||
ns->user_ns = get_user_ns(user_ns);
|
||||
INIT_WORK(&ns->proc_work, proc_cleanup_work);
|
||||
|
||||
set_bit(0, ns->pidmap[0].page);
|
||||
atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
|
||||
@@ -112,15 +119,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
|
||||
for (i = 1; i < PIDMAP_ENTRIES; i++)
|
||||
atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
|
||||
|
||||
err = pid_ns_prepare_proc(ns);
|
||||
if (err)
|
||||
goto out_put_parent_pid_ns;
|
||||
|
||||
return ns;
|
||||
|
||||
out_put_parent_pid_ns:
|
||||
put_pid_ns(parent_pid_ns);
|
||||
put_user_ns(user_ns);
|
||||
out_free_map:
|
||||
kfree(ns->pidmap[0].page);
|
||||
out_free:
|
||||
|
||||
Reference in New Issue
Block a user