cgroups: read-write lock CLONE_THREAD forking per threadgroup

Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

Add an rwsem that lives in a threadgroup's signal_struct that's taken for
reading in the fork path, under CONFIG_CGROUPS.  If another part of the
kernel later wants to use such a locking mechanism, the CONFIG_CGROUPS
ifdefs should be changed to a higher-up flag that CGROUPS and the other
system would both depend on.

This is a pre-patch for cgroup-procs-write.patch.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Ben Blum 2011-05-26 16:25:18 -07:00 committed by Linus Torvalds
parent dcb3a08e69
commit 4714d1d32d
3 changed files with 55 additions and 0 deletions

View File

@ -22,6 +22,14 @@
extern struct files_struct init_files; extern struct files_struct init_files;
extern struct fs_struct init_fs; extern struct fs_struct init_fs;
#ifdef CONFIG_CGROUPS
#define INIT_THREADGROUP_FORK_LOCK(sig) \
.threadgroup_fork_lock = \
__RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
#else
#define INIT_THREADGROUP_FORK_LOCK(sig)
#endif
#define INIT_SIGNALS(sig) { \ #define INIT_SIGNALS(sig) { \
.nr_threads = 1, \ .nr_threads = 1, \
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
@ -38,6 +46,7 @@ extern struct fs_struct init_fs;
}, \ }, \
.cred_guard_mutex = \ .cred_guard_mutex = \
__MUTEX_INITIALIZER(sig.cred_guard_mutex), \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \
INIT_THREADGROUP_FORK_LOCK(sig) \
} }
extern struct nsproxy init_nsproxy; extern struct nsproxy init_nsproxy;

View File

@ -513,6 +513,7 @@ struct thread_group_cputimer {
spinlock_t lock; spinlock_t lock;
}; };
#include <linux/rwsem.h>
struct autogroup; struct autogroup;
/* /*
@ -632,6 +633,16 @@ struct signal_struct {
unsigned audit_tty; unsigned audit_tty;
struct tty_audit_buf *tty_audit_buf; struct tty_audit_buf *tty_audit_buf;
#endif #endif
#ifdef CONFIG_CGROUPS
/*
* The threadgroup_fork_lock prevents threads from forking with
* CLONE_THREAD while held for writing. Use this for fork-sensitive
* threadgroup-wide operations. It's taken for reading in fork.c in
* copy_process().
* Currently only needed write-side by cgroups.
*/
struct rw_semaphore threadgroup_fork_lock;
#endif
int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_adj; /* OOM kill score adjustment (bit shift) */
int oom_score_adj; /* OOM kill score adjustment */ int oom_score_adj; /* OOM kill score adjustment */
@ -2323,6 +2334,31 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
} }
/* See the declaration of threadgroup_fork_lock in signal_struct. */
#ifdef CONFIG_CGROUPS
static inline void threadgroup_fork_read_lock(struct task_struct *tsk)
{
down_read(&tsk->signal->threadgroup_fork_lock);
}
static inline void threadgroup_fork_read_unlock(struct task_struct *tsk)
{
up_read(&tsk->signal->threadgroup_fork_lock);
}
static inline void threadgroup_fork_write_lock(struct task_struct *tsk)
{
down_write(&tsk->signal->threadgroup_fork_lock);
}
static inline void threadgroup_fork_write_unlock(struct task_struct *tsk)
{
up_write(&tsk->signal->threadgroup_fork_lock);
}
#else
static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {}
static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {}
static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {}
static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {}
#endif
#ifndef __HAVE_THREAD_FUNCTIONS #ifndef __HAVE_THREAD_FUNCTIONS
#define task_thread_info(task) ((struct thread_info *)(task)->stack) #define task_thread_info(task) ((struct thread_info *)(task)->stack)

View File

@ -957,6 +957,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
tty_audit_fork(sig); tty_audit_fork(sig);
sched_autogroup_fork(sig); sched_autogroup_fork(sig);
#ifdef CONFIG_CGROUPS
init_rwsem(&sig->threadgroup_fork_lock);
#endif
sig->oom_adj = current->signal->oom_adj; sig->oom_adj = current->signal->oom_adj;
sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min; sig->oom_score_adj_min = current->signal->oom_score_adj_min;
@ -1138,6 +1142,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
monotonic_to_bootbased(&p->real_start_time); monotonic_to_bootbased(&p->real_start_time);
p->io_context = NULL; p->io_context = NULL;
p->audit_context = NULL; p->audit_context = NULL;
if (clone_flags & CLONE_THREAD)
threadgroup_fork_read_lock(current);
cgroup_fork(p); cgroup_fork(p);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy); p->mempolicy = mpol_dup(p->mempolicy);
@ -1342,6 +1348,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
proc_fork_connector(p); proc_fork_connector(p);
cgroup_post_fork(p); cgroup_post_fork(p);
if (clone_flags & CLONE_THREAD)
threadgroup_fork_read_unlock(current);
perf_event_fork(p); perf_event_fork(p);
return p; return p;
@ -1380,6 +1388,8 @@ bad_fork_cleanup_policy:
mpol_put(p->mempolicy); mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup: bad_fork_cleanup_cgroup:
#endif #endif
if (clone_flags & CLONE_THREAD)
threadgroup_fork_read_unlock(current);
cgroup_exit(p, cgroup_callbacks_done); cgroup_exit(p, cgroup_callbacks_done);
delayacct_tsk_free(p); delayacct_tsk_free(p);
module_put(task_thread_info(p)->exec_domain->module); module_put(task_thread_info(p)->exec_domain->module);