a79a908fd2
Introduce the ability to create new cgroup namespace. The newly created cgroup namespace remembers the cgroup of the process at the point of creation of the cgroup namespace (referred as cgroupns-root). The main purpose of cgroup namespace is to virtualize the contents of /proc/self/cgroup file. Processes inside a cgroup namespace are only able to see paths relative to their namespace root (unless they are moved outside of their cgroupns-root, at which point they will see a relative path from their cgroupns-root). For a correctly setup container this enables container-tools (like libcontainer, lxc, lmctfy, etc.) to create completely virtualized containers without leaking system level cgroup hierarchy to the task. This patch only implements the 'unshare' part of the cgroupns. Signed-off-by: Aditya Kali <adityakali@google.com> Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com> Signed-off-by: Tejun Heo <tj@kernel.org>
180 lines
4.1 KiB
C
180 lines
4.1 KiB
C
#include <linux/proc_fs.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/file.h>
|
|
#include <linux/utsname.h>
|
|
#include <net/net_namespace.h>
|
|
#include <linux/ipc_namespace.h>
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/user_namespace.h>
|
|
#include "internal.h"
|
|
|
|
|
|
static const struct proc_ns_operations *ns_entries[] = {
|
|
#ifdef CONFIG_NET_NS
|
|
&netns_operations,
|
|
#endif
|
|
#ifdef CONFIG_UTS_NS
|
|
&utsns_operations,
|
|
#endif
|
|
#ifdef CONFIG_IPC_NS
|
|
&ipcns_operations,
|
|
#endif
|
|
#ifdef CONFIG_PID_NS
|
|
&pidns_operations,
|
|
#endif
|
|
#ifdef CONFIG_USER_NS
|
|
&userns_operations,
|
|
#endif
|
|
&mntns_operations,
|
|
#ifdef CONFIG_CGROUPS
|
|
&cgroupns_operations,
|
|
#endif
|
|
};
|
|
|
|
static const char *proc_ns_get_link(struct dentry *dentry,
|
|
struct inode *inode,
|
|
struct delayed_call *done)
|
|
{
|
|
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
|
struct task_struct *task;
|
|
struct path ns_path;
|
|
void *error = ERR_PTR(-EACCES);
|
|
|
|
if (!dentry)
|
|
return ERR_PTR(-ECHILD);
|
|
|
|
task = get_proc_task(inode);
|
|
if (!task)
|
|
return error;
|
|
|
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
|
error = ns_get_path(&ns_path, task, ns_ops);
|
|
if (!error)
|
|
nd_jump_link(&ns_path);
|
|
}
|
|
put_task_struct(task);
|
|
return error;
|
|
}
|
|
|
|
static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
|
struct task_struct *task;
|
|
char name[50];
|
|
int res = -EACCES;
|
|
|
|
task = get_proc_task(inode);
|
|
if (!task)
|
|
return res;
|
|
|
|
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
|
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
|
if (res >= 0)
|
|
res = readlink_copy(buffer, buflen, name);
|
|
}
|
|
put_task_struct(task);
|
|
return res;
|
|
}
|
|
|
|
static const struct inode_operations proc_ns_link_inode_operations = {
|
|
.readlink = proc_ns_readlink,
|
|
.get_link = proc_ns_get_link,
|
|
.setattr = proc_setattr,
|
|
};
|
|
|
|
static int proc_ns_instantiate(struct inode *dir,
|
|
struct dentry *dentry, struct task_struct *task, const void *ptr)
|
|
{
|
|
const struct proc_ns_operations *ns_ops = ptr;
|
|
struct inode *inode;
|
|
struct proc_inode *ei;
|
|
|
|
inode = proc_pid_make_inode(dir->i_sb, task);
|
|
if (!inode)
|
|
goto out;
|
|
|
|
ei = PROC_I(inode);
|
|
inode->i_mode = S_IFLNK|S_IRWXUGO;
|
|
inode->i_op = &proc_ns_link_inode_operations;
|
|
ei->ns_ops = ns_ops;
|
|
|
|
d_set_d_op(dentry, &pid_dentry_operations);
|
|
d_add(dentry, inode);
|
|
/* Close the race of the process dying before we return the dentry */
|
|
if (pid_revalidate(dentry, 0))
|
|
return 0;
|
|
out:
|
|
return -ENOENT;
|
|
}
|
|
|
|
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
|
|
{
|
|
struct task_struct *task = get_proc_task(file_inode(file));
|
|
const struct proc_ns_operations **entry, **last;
|
|
|
|
if (!task)
|
|
return -ENOENT;
|
|
|
|
if (!dir_emit_dots(file, ctx))
|
|
goto out;
|
|
if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries))
|
|
goto out;
|
|
entry = ns_entries + (ctx->pos - 2);
|
|
last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
|
|
while (entry <= last) {
|
|
const struct proc_ns_operations *ops = *entry;
|
|
if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name),
|
|
proc_ns_instantiate, task, ops))
|
|
break;
|
|
ctx->pos++;
|
|
entry++;
|
|
}
|
|
out:
|
|
put_task_struct(task);
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations proc_ns_dir_operations = {
|
|
.read = generic_read_dir,
|
|
.iterate = proc_ns_dir_readdir,
|
|
};
|
|
|
|
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
|
|
struct dentry *dentry, unsigned int flags)
|
|
{
|
|
int error;
|
|
struct task_struct *task = get_proc_task(dir);
|
|
const struct proc_ns_operations **entry, **last;
|
|
unsigned int len = dentry->d_name.len;
|
|
|
|
error = -ENOENT;
|
|
|
|
if (!task)
|
|
goto out_no_task;
|
|
|
|
last = &ns_entries[ARRAY_SIZE(ns_entries)];
|
|
for (entry = ns_entries; entry < last; entry++) {
|
|
if (strlen((*entry)->name) != len)
|
|
continue;
|
|
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
|
|
break;
|
|
}
|
|
if (entry == last)
|
|
goto out;
|
|
|
|
error = proc_ns_instantiate(dir, dentry, task, *entry);
|
|
out:
|
|
put_task_struct(task);
|
|
out_no_task:
|
|
return ERR_PTR(error);
|
|
}
|
|
|
|
const struct inode_operations proc_ns_dir_inode_operations = {
|
|
.lookup = proc_ns_dir_lookup,
|
|
.getattr = pid_getattr,
|
|
.setattr = proc_setattr,
|
|
};
|