forked from Minki/linux
eaa0d190bf
pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. This patch solves the problem, and it exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] Link: http://lkml.kernel.org/r/149201123914.6007.2187327078064239572.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> Cc: Andrei Vagin <avagin@virtuozzo.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul Moore <paul@paul-moore.com> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Ingo Molnar <mingo@kernel.org> Cc: Serge Hallyn <serge@hallyn.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
181 lines
4.2 KiB
C
181 lines
4.2 KiB
C
#include <linux/proc_fs.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/file.h>
|
|
#include <linux/utsname.h>
|
|
#include <net/net_namespace.h>
|
|
#include <linux/ipc_namespace.h>
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/user_namespace.h>
|
|
#include "internal.h"
|
|
|
|
|
|
static const struct proc_ns_operations *ns_entries[] = {
|
|
#ifdef CONFIG_NET_NS
|
|
&netns_operations,
|
|
#endif
|
|
#ifdef CONFIG_UTS_NS
|
|
&utsns_operations,
|
|
#endif
|
|
#ifdef CONFIG_IPC_NS
|
|
&ipcns_operations,
|
|
#endif
|
|
#ifdef CONFIG_PID_NS
|
|
&pidns_operations,
|
|
&pidns_for_children_operations,
|
|
#endif
|
|
#ifdef CONFIG_USER_NS
|
|
&userns_operations,
|
|
#endif
|
|
&mntns_operations,
|
|
#ifdef CONFIG_CGROUPS
|
|
&cgroupns_operations,
|
|
#endif
|
|
};
|
|
|
|
static const char *proc_ns_get_link(struct dentry *dentry,
|
|
struct inode *inode,
|
|
struct delayed_call *done)
|
|
{
|
|
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
|
struct task_struct *task;
|
|
struct path ns_path;
|
|
void *error = ERR_PTR(-EACCES);
|
|
|
|
if (!dentry)
|
|
return ERR_PTR(-ECHILD);
|
|
|
|
task = get_proc_task(inode);
|
|
if (!task)
|
|
return error;
|
|
|
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
|
error = ns_get_path(&ns_path, task, ns_ops);
|
|
if (!error)
|
|
nd_jump_link(&ns_path);
|
|
}
|
|
put_task_struct(task);
|
|
return error;
|
|
}
|
|
|
|
static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
|
struct task_struct *task;
|
|
char name[50];
|
|
int res = -EACCES;
|
|
|
|
task = get_proc_task(inode);
|
|
if (!task)
|
|
return res;
|
|
|
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
|
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
|
if (res >= 0)
|
|
res = readlink_copy(buffer, buflen, name);
|
|
}
|
|
put_task_struct(task);
|
|
return res;
|
|
}
|
|
|
|
static const struct inode_operations proc_ns_link_inode_operations = {
|
|
.readlink = proc_ns_readlink,
|
|
.get_link = proc_ns_get_link,
|
|
.setattr = proc_setattr,
|
|
};
|
|
|
|
static int proc_ns_instantiate(struct inode *dir,
|
|
struct dentry *dentry, struct task_struct *task, const void *ptr)
|
|
{
|
|
const struct proc_ns_operations *ns_ops = ptr;
|
|
struct inode *inode;
|
|
struct proc_inode *ei;
|
|
|
|
inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO);
|
|
if (!inode)
|
|
goto out;
|
|
|
|
ei = PROC_I(inode);
|
|
inode->i_op = &proc_ns_link_inode_operations;
|
|
ei->ns_ops = ns_ops;
|
|
|
|
d_set_d_op(dentry, &pid_dentry_operations);
|
|
d_add(dentry, inode);
|
|
/* Close the race of the process dying before we return the dentry */
|
|
if (pid_revalidate(dentry, 0))
|
|
return 0;
|
|
out:
|
|
return -ENOENT;
|
|
}
|
|
|
|
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
|
|
{
|
|
struct task_struct *task = get_proc_task(file_inode(file));
|
|
const struct proc_ns_operations **entry, **last;
|
|
|
|
if (!task)
|
|
return -ENOENT;
|
|
|
|
if (!dir_emit_dots(file, ctx))
|
|
goto out;
|
|
if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries))
|
|
goto out;
|
|
entry = ns_entries + (ctx->pos - 2);
|
|
last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
|
|
while (entry <= last) {
|
|
const struct proc_ns_operations *ops = *entry;
|
|
if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name),
|
|
proc_ns_instantiate, task, ops))
|
|
break;
|
|
ctx->pos++;
|
|
entry++;
|
|
}
|
|
out:
|
|
put_task_struct(task);
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations proc_ns_dir_operations = {
|
|
.read = generic_read_dir,
|
|
.iterate_shared = proc_ns_dir_readdir,
|
|
.llseek = generic_file_llseek,
|
|
};
|
|
|
|
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
|
|
struct dentry *dentry, unsigned int flags)
|
|
{
|
|
int error;
|
|
struct task_struct *task = get_proc_task(dir);
|
|
const struct proc_ns_operations **entry, **last;
|
|
unsigned int len = dentry->d_name.len;
|
|
|
|
error = -ENOENT;
|
|
|
|
if (!task)
|
|
goto out_no_task;
|
|
|
|
last = &ns_entries[ARRAY_SIZE(ns_entries)];
|
|
for (entry = ns_entries; entry < last; entry++) {
|
|
if (strlen((*entry)->name) != len)
|
|
continue;
|
|
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
|
|
break;
|
|
}
|
|
if (entry == last)
|
|
goto out;
|
|
|
|
error = proc_ns_instantiate(dir, dentry, task, *entry);
|
|
out:
|
|
put_task_struct(task);
|
|
out_no_task:
|
|
return ERR_PTR(error);
|
|
}
|
|
|
|
const struct inode_operations proc_ns_dir_inode_operations = {
|
|
.lookup = proc_ns_dir_lookup,
|
|
.getattr = pid_getattr,
|
|
.setattr = proc_setattr,
|
|
};
|