pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. This patch solves the problem, and it exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] Link: http://lkml.kernel.org/r/149201123914.6007.2187327078064239572.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> Cc: Andrei Vagin <avagin@virtuozzo.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul Moore <paul@paul-moore.com> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Ingo Molnar <mingo@kernel.org> Cc: Serge Hallyn <serge@hallyn.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			181 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			181 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include <linux/proc_fs.h>
 | |
| #include <linux/nsproxy.h>
 | |
| #include <linux/ptrace.h>
 | |
| #include <linux/namei.h>
 | |
| #include <linux/file.h>
 | |
| #include <linux/utsname.h>
 | |
| #include <net/net_namespace.h>
 | |
| #include <linux/ipc_namespace.h>
 | |
| #include <linux/pid_namespace.h>
 | |
| #include <linux/user_namespace.h>
 | |
| #include "internal.h"
 | |
| 
 | |
| 
 | |
| static const struct proc_ns_operations *ns_entries[] = {
 | |
| #ifdef CONFIG_NET_NS
 | |
| 	&netns_operations,
 | |
| #endif
 | |
| #ifdef CONFIG_UTS_NS
 | |
| 	&utsns_operations,
 | |
| #endif
 | |
| #ifdef CONFIG_IPC_NS
 | |
| 	&ipcns_operations,
 | |
| #endif
 | |
| #ifdef CONFIG_PID_NS
 | |
| 	&pidns_operations,
 | |
| 	&pidns_for_children_operations,
 | |
| #endif
 | |
| #ifdef CONFIG_USER_NS
 | |
| 	&userns_operations,
 | |
| #endif
 | |
| 	&mntns_operations,
 | |
| #ifdef CONFIG_CGROUPS
 | |
| 	&cgroupns_operations,
 | |
| #endif
 | |
| };
 | |
| 
 | |
| static const char *proc_ns_get_link(struct dentry *dentry,
 | |
| 				    struct inode *inode,
 | |
| 				    struct delayed_call *done)
 | |
| {
 | |
| 	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
 | |
| 	struct task_struct *task;
 | |
| 	struct path ns_path;
 | |
| 	void *error = ERR_PTR(-EACCES);
 | |
| 
 | |
| 	if (!dentry)
 | |
| 		return ERR_PTR(-ECHILD);
 | |
| 
 | |
| 	task = get_proc_task(inode);
 | |
| 	if (!task)
 | |
| 		return error;
 | |
| 
 | |
| 	if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
 | |
| 		error = ns_get_path(&ns_path, task, ns_ops);
 | |
| 		if (!error)
 | |
| 			nd_jump_link(&ns_path);
 | |
| 	}
 | |
| 	put_task_struct(task);
 | |
| 	return error;
 | |
| }
 | |
| 
 | |
| static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 | |
| {
 | |
| 	struct inode *inode = d_inode(dentry);
 | |
| 	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
 | |
| 	struct task_struct *task;
 | |
| 	char name[50];
 | |
| 	int res = -EACCES;
 | |
| 
 | |
| 	task = get_proc_task(inode);
 | |
| 	if (!task)
 | |
| 		return res;
 | |
| 
 | |
| 	if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
 | |
| 		res = ns_get_name(name, sizeof(name), task, ns_ops);
 | |
| 		if (res >= 0)
 | |
| 			res = readlink_copy(buffer, buflen, name);
 | |
| 	}
 | |
| 	put_task_struct(task);
 | |
| 	return res;
 | |
| }
 | |
| 
 | |
| static const struct inode_operations proc_ns_link_inode_operations = {
 | |
| 	.readlink	= proc_ns_readlink,
 | |
| 	.get_link	= proc_ns_get_link,
 | |
| 	.setattr	= proc_setattr,
 | |
| };
 | |
| 
 | |
| static int proc_ns_instantiate(struct inode *dir,
 | |
| 	struct dentry *dentry, struct task_struct *task, const void *ptr)
 | |
| {
 | |
| 	const struct proc_ns_operations *ns_ops = ptr;
 | |
| 	struct inode *inode;
 | |
| 	struct proc_inode *ei;
 | |
| 
 | |
| 	inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO);
 | |
| 	if (!inode)
 | |
| 		goto out;
 | |
| 
 | |
| 	ei = PROC_I(inode);
 | |
| 	inode->i_op = &proc_ns_link_inode_operations;
 | |
| 	ei->ns_ops = ns_ops;
 | |
| 
 | |
| 	d_set_d_op(dentry, &pid_dentry_operations);
 | |
| 	d_add(dentry, inode);
 | |
| 	/* Close the race of the process dying before we return the dentry */
 | |
| 	if (pid_revalidate(dentry, 0))
 | |
| 		return 0;
 | |
| out:
 | |
| 	return -ENOENT;
 | |
| }
 | |
| 
 | |
| static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
 | |
| {
 | |
| 	struct task_struct *task = get_proc_task(file_inode(file));
 | |
| 	const struct proc_ns_operations **entry, **last;
 | |
| 
 | |
| 	if (!task)
 | |
| 		return -ENOENT;
 | |
| 
 | |
| 	if (!dir_emit_dots(file, ctx))
 | |
| 		goto out;
 | |
| 	if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries))
 | |
| 		goto out;
 | |
| 	entry = ns_entries + (ctx->pos - 2);
 | |
| 	last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
 | |
| 	while (entry <= last) {
 | |
| 		const struct proc_ns_operations *ops = *entry;
 | |
| 		if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name),
 | |
| 				     proc_ns_instantiate, task, ops))
 | |
| 			break;
 | |
| 		ctx->pos++;
 | |
| 		entry++;
 | |
| 	}
 | |
| out:
 | |
| 	put_task_struct(task);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| const struct file_operations proc_ns_dir_operations = {
 | |
| 	.read		= generic_read_dir,
 | |
| 	.iterate_shared	= proc_ns_dir_readdir,
 | |
| 	.llseek		= generic_file_llseek,
 | |
| };
 | |
| 
 | |
| static struct dentry *proc_ns_dir_lookup(struct inode *dir,
 | |
| 				struct dentry *dentry, unsigned int flags)
 | |
| {
 | |
| 	int error;
 | |
| 	struct task_struct *task = get_proc_task(dir);
 | |
| 	const struct proc_ns_operations **entry, **last;
 | |
| 	unsigned int len = dentry->d_name.len;
 | |
| 
 | |
| 	error = -ENOENT;
 | |
| 
 | |
| 	if (!task)
 | |
| 		goto out_no_task;
 | |
| 
 | |
| 	last = &ns_entries[ARRAY_SIZE(ns_entries)];
 | |
| 	for (entry = ns_entries; entry < last; entry++) {
 | |
| 		if (strlen((*entry)->name) != len)
 | |
| 			continue;
 | |
| 		if (!memcmp(dentry->d_name.name, (*entry)->name, len))
 | |
| 			break;
 | |
| 	}
 | |
| 	if (entry == last)
 | |
| 		goto out;
 | |
| 
 | |
| 	error = proc_ns_instantiate(dir, dentry, task, *entry);
 | |
| out:
 | |
| 	put_task_struct(task);
 | |
| out_no_task:
 | |
| 	return ERR_PTR(error);
 | |
| }
 | |
| 
 | |
| const struct inode_operations proc_ns_dir_inode_operations = {
 | |
| 	.lookup		= proc_ns_dir_lookup,
 | |
| 	.getattr	= pid_getattr,
 | |
| 	.setattr	= proc_setattr,
 | |
| };
 |