forked from Minki/linux
Merge tag 'seccomp-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into next
This commit is contained in:
commit
fd33c43677
10
MAINTAINERS
10
MAINTAINERS
@ -7953,6 +7953,16 @@ S: Maintained
|
||||
F: drivers/mmc/host/sdhci.*
|
||||
F: drivers/mmc/host/sdhci-pltfm.[ch]
|
||||
|
||||
SECURE COMPUTING
|
||||
M: Kees Cook <keescook@chromium.org>
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp
|
||||
S: Supported
|
||||
F: kernel/seccomp.c
|
||||
F: include/uapi/linux/seccomp.h
|
||||
F: include/linux/seccomp.h
|
||||
K: \bsecure_computing
|
||||
K: \bTIF_SECCOMP\b
|
||||
|
||||
SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
|
||||
M: Anton Vorontsov <anton@enomsg.org>
|
||||
L: linuxppc-dev@lists.ozlabs.org
|
||||
|
@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
|
||||
- secure_computing is called from a ptrace_event()-safe context
|
||||
- secure_computing return value is checked and a return value of -1
|
||||
results in the system call being skipped immediately.
|
||||
- seccomp syscall wired up
|
||||
|
||||
config SECCOMP_FILTER
|
||||
def_bool y
|
||||
|
@ -409,6 +409,7 @@
|
||||
#define __NR_sched_setattr (__NR_SYSCALL_BASE+380)
|
||||
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
|
||||
#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
|
||||
#define __NR_seccomp (__NR_SYSCALL_BASE+383)
|
||||
|
||||
/*
|
||||
* This may need to be greater than __NR_last_syscall+1 in order to
|
||||
|
@ -392,6 +392,7 @@
|
||||
/* 380 */ CALL(sys_sched_setattr)
|
||||
CALL(sys_sched_getattr)
|
||||
CALL(sys_renameat2)
|
||||
CALL(sys_seccomp)
|
||||
#ifndef syscalls_counted
|
||||
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
|
||||
#define syscalls_counted
|
||||
|
@ -372,16 +372,17 @@
|
||||
#define __NR_sched_setattr (__NR_Linux + 349)
|
||||
#define __NR_sched_getattr (__NR_Linux + 350)
|
||||
#define __NR_renameat2 (__NR_Linux + 351)
|
||||
#define __NR_seccomp (__NR_Linux + 352)
|
||||
|
||||
/*
|
||||
* Offset of the last Linux o32 flavoured syscall
|
||||
*/
|
||||
#define __NR_Linux_syscalls 351
|
||||
#define __NR_Linux_syscalls 352
|
||||
|
||||
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
|
||||
|
||||
#define __NR_O32_Linux 4000
|
||||
#define __NR_O32_Linux_syscalls 351
|
||||
#define __NR_O32_Linux_syscalls 352
|
||||
|
||||
#if _MIPS_SIM == _MIPS_SIM_ABI64
|
||||
|
||||
@ -701,16 +702,17 @@
|
||||
#define __NR_sched_setattr (__NR_Linux + 309)
|
||||
#define __NR_sched_getattr (__NR_Linux + 310)
|
||||
#define __NR_renameat2 (__NR_Linux + 311)
|
||||
#define __NR_seccomp (__NR_Linux + 312)
|
||||
|
||||
/*
|
||||
* Offset of the last Linux 64-bit flavoured syscall
|
||||
*/
|
||||
#define __NR_Linux_syscalls 311
|
||||
#define __NR_Linux_syscalls 312
|
||||
|
||||
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
|
||||
|
||||
#define __NR_64_Linux 5000
|
||||
#define __NR_64_Linux_syscalls 311
|
||||
#define __NR_64_Linux_syscalls 312
|
||||
|
||||
#if _MIPS_SIM == _MIPS_SIM_NABI32
|
||||
|
||||
@ -1034,15 +1036,16 @@
|
||||
#define __NR_sched_setattr (__NR_Linux + 313)
|
||||
#define __NR_sched_getattr (__NR_Linux + 314)
|
||||
#define __NR_renameat2 (__NR_Linux + 315)
|
||||
#define __NR_seccomp (__NR_Linux + 316)
|
||||
|
||||
/*
|
||||
* Offset of the last N32 flavoured syscall
|
||||
*/
|
||||
#define __NR_Linux_syscalls 315
|
||||
#define __NR_Linux_syscalls 316
|
||||
|
||||
#endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
|
||||
|
||||
#define __NR_N32_Linux 6000
|
||||
#define __NR_N32_Linux_syscalls 315
|
||||
#define __NR_N32_Linux_syscalls 316
|
||||
|
||||
#endif /* _UAPI_ASM_UNISTD_H */
|
||||
|
@ -578,3 +578,4 @@ EXPORT(sys_call_table)
|
||||
PTR sys_sched_setattr
|
||||
PTR sys_sched_getattr /* 4350 */
|
||||
PTR sys_renameat2
|
||||
PTR sys_seccomp
|
||||
|
@ -431,4 +431,5 @@ EXPORT(sys_call_table)
|
||||
PTR sys_sched_setattr
|
||||
PTR sys_sched_getattr /* 5310 */
|
||||
PTR sys_renameat2
|
||||
PTR sys_seccomp
|
||||
.size sys_call_table,.-sys_call_table
|
||||
|
@ -424,4 +424,5 @@ EXPORT(sysn32_call_table)
|
||||
PTR sys_sched_setattr
|
||||
PTR sys_sched_getattr
|
||||
PTR sys_renameat2 /* 6315 */
|
||||
PTR sys_seccomp
|
||||
.size sysn32_call_table,.-sysn32_call_table
|
||||
|
@ -557,4 +557,5 @@ EXPORT(sys32_call_table)
|
||||
PTR sys_sched_setattr
|
||||
PTR sys_sched_getattr /* 4350 */
|
||||
PTR sys_renameat2
|
||||
PTR sys_seccomp
|
||||
.size sys32_call_table,.-sys32_call_table
|
||||
|
@ -360,3 +360,4 @@
|
||||
351 i386 sched_setattr sys_sched_setattr
|
||||
352 i386 sched_getattr sys_sched_getattr
|
||||
353 i386 renameat2 sys_renameat2
|
||||
354 i386 seccomp sys_seccomp
|
||||
|
@ -323,6 +323,7 @@
|
||||
314 common sched_setattr sys_sched_setattr
|
||||
315 common sched_getattr sys_sched_getattr
|
||||
316 common renameat2 sys_renameat2
|
||||
317 common seccomp sys_seccomp
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
|
||||
/*
|
||||
* determine how safe it is to execute the proposed program
|
||||
* - the caller must hold ->cred_guard_mutex to protect against
|
||||
* PTRACE_ATTACH
|
||||
* PTRACE_ATTACH or seccomp thread-sync
|
||||
*/
|
||||
static void check_unsafe_exec(struct linux_binprm *bprm)
|
||||
{
|
||||
@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
|
||||
* This isn't strictly necessary, but it makes it harder for LSMs to
|
||||
* mess up.
|
||||
*/
|
||||
if (current->no_new_privs)
|
||||
if (task_no_new_privs(current))
|
||||
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
|
||||
|
||||
t = p;
|
||||
@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
|
||||
bprm->cred->egid = current_egid();
|
||||
|
||||
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
|
||||
!current->no_new_privs &&
|
||||
!task_no_new_privs(current) &&
|
||||
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
|
||||
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
|
||||
/* Set-uid? */
|
||||
|
@ -1307,13 +1307,12 @@ struct task_struct {
|
||||
* execve */
|
||||
unsigned in_iowait:1;
|
||||
|
||||
/* task may not gain privileges */
|
||||
unsigned no_new_privs:1;
|
||||
|
||||
/* Revert to default priority/policy when forking */
|
||||
unsigned sched_reset_on_fork:1;
|
||||
unsigned sched_contributes_to_load:1;
|
||||
|
||||
unsigned long atomic_flags; /* Flags needing atomic access. */
|
||||
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
|
||||
@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
|
||||
current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
|
||||
}
|
||||
|
||||
/* Per-process atomic flags. */
|
||||
#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
|
||||
|
||||
static inline bool task_no_new_privs(struct task_struct *p)
|
||||
{
|
||||
return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
|
||||
}
|
||||
|
||||
static inline void task_set_no_new_privs(struct task_struct *p)
|
||||
{
|
||||
set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* task->jobctl flags
|
||||
*/
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#include <uapi/linux/seccomp.h>
|
||||
|
||||
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
|
||||
#include <linux/thread_info.h>
|
||||
@ -14,11 +16,11 @@ struct seccomp_filter;
|
||||
*
|
||||
* @mode: indicates one of the valid values above for controlled
|
||||
* system calls available to a process.
|
||||
* @filter: The metadata and ruleset for determining what system calls
|
||||
* are allowed for a task.
|
||||
* @filter: must always point to a valid seccomp-filter or NULL as it is
|
||||
* accessed without locking during system call entry.
|
||||
*
|
||||
* @filter must only be accessed from the context of current as there
|
||||
* is no locking.
|
||||
* is no read locking.
|
||||
*/
|
||||
struct seccomp {
|
||||
int mode;
|
||||
|
@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
|
||||
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
|
||||
unsigned long idx1, unsigned long idx2);
|
||||
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
|
||||
asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
|
||||
const char __user *uargs);
|
||||
#endif
|
||||
|
@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
|
||||
__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
|
||||
#define __NR_renameat2 276
|
||||
__SYSCALL(__NR_renameat2, sys_renameat2)
|
||||
#define __NR_seccomp 277
|
||||
__SYSCALL(__NR_seccomp, sys_seccomp)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 277
|
||||
#define __NR_syscalls 278
|
||||
|
||||
/*
|
||||
* All syscalls below here should go away really,
|
||||
|
@ -10,6 +10,13 @@
|
||||
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
|
||||
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
|
||||
|
||||
/* Valid operations for seccomp syscall. */
|
||||
#define SECCOMP_SET_MODE_STRICT 0
|
||||
#define SECCOMP_SET_MODE_FILTER 1
|
||||
|
||||
/* Valid flags for SECCOMP_SET_MODE_FILTER */
|
||||
#define SECCOMP_FILTER_FLAG_TSYNC 1
|
||||
|
||||
/*
|
||||
* All BPF programs must return a 32-bit value.
|
||||
* The bottom 16-bits are for optional return data.
|
||||
|
@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
goto free_ti;
|
||||
|
||||
tsk->stack = ti;
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* We must handle setting up seccomp filters once we're under
|
||||
* the sighand lock in case orig has changed between now and
|
||||
* then. Until then, filter must be NULL to avoid messing up
|
||||
* the usage counts on the error path calling free_task.
|
||||
*/
|
||||
tsk->seccomp.filter = NULL;
|
||||
#endif
|
||||
|
||||
setup_thread_stack(tsk, orig);
|
||||
clear_user_return_notifier(tsk);
|
||||
@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void copy_seccomp(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Must be called with sighand->lock held, which is common to
|
||||
* all threads in the group. Holding cred_guard_mutex is not
|
||||
* needed because this new task is not yet running and cannot
|
||||
* be racing exec.
|
||||
*/
|
||||
BUG_ON(!spin_is_locked(¤t->sighand->siglock));
|
||||
|
||||
/* Ref-count the new filter user, and assign it. */
|
||||
get_seccomp_filter(current);
|
||||
p->seccomp = current->seccomp;
|
||||
|
||||
/*
|
||||
* Explicitly enable no_new_privs here in case it got set
|
||||
* between the task_struct being duplicated and holding the
|
||||
* sighand lock. The seccomp state and nnp must be in sync.
|
||||
*/
|
||||
if (task_no_new_privs(current))
|
||||
task_set_no_new_privs(p);
|
||||
|
||||
/*
|
||||
* If the parent gained a seccomp mode after copying thread
|
||||
* flags and between before we held the sighand lock, we have
|
||||
* to manually enable the seccomp thread flag here.
|
||||
*/
|
||||
if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
|
||||
set_tsk_thread_flag(p, TIF_SECCOMP);
|
||||
#endif
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
|
||||
{
|
||||
current->clear_child_tid = tidptr;
|
||||
@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
goto fork_out;
|
||||
|
||||
ftrace_graph_init_task(p);
|
||||
get_seccomp_filter(p);
|
||||
|
||||
rt_mutex_init_task(p);
|
||||
|
||||
@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
|
||||
/*
|
||||
* Copy seccomp details explicitly here, in case they were changed
|
||||
* before holding sighand lock.
|
||||
*/
|
||||
copy_seccomp(p);
|
||||
|
||||
/*
|
||||
* Process group and session signals need to be delivered to just the
|
||||
* parent before the fork or both the parent and the child after the
|
||||
|
414
kernel/seccomp.c
414
kernel/seccomp.c
@ -18,15 +18,17 @@
|
||||
#include <linux/compat.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
/* #define SECCOMP_DEBUG 1 */
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
#include <asm/syscall.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
@ -172,21 +174,24 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
|
||||
*/
|
||||
static u32 seccomp_run_filters(int syscall)
|
||||
{
|
||||
struct seccomp_filter *f;
|
||||
struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
|
||||
struct seccomp_data sd;
|
||||
u32 ret = SECCOMP_RET_ALLOW;
|
||||
|
||||
/* Ensure unexpected behavior doesn't result in failing open. */
|
||||
if (WARN_ON(current->seccomp.filter == NULL))
|
||||
if (unlikely(WARN_ON(f == NULL)))
|
||||
return SECCOMP_RET_KILL;
|
||||
|
||||
/* Make sure cross-thread synced filter points somewhere sane. */
|
||||
smp_read_barrier_depends();
|
||||
|
||||
populate_seccomp_data(&sd);
|
||||
|
||||
/*
|
||||
* All filters in the list are evaluated and the lowest BPF return
|
||||
* value always takes priority (ignoring the DATA).
|
||||
*/
|
||||
for (f = current->seccomp.filter; f; f = f->prev) {
|
||||
for (; f; f = f->prev) {
|
||||
u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
|
||||
|
||||
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
|
||||
@ -194,29 +199,159 @@ static u32 seccomp_run_filters(int syscall)
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_SECCOMP_FILTER */
|
||||
|
||||
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
|
||||
{
|
||||
BUG_ON(!spin_is_locked(¤t->sighand->siglock));
|
||||
|
||||
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void seccomp_assign_mode(struct task_struct *task,
|
||||
unsigned long seccomp_mode)
|
||||
{
|
||||
BUG_ON(!spin_is_locked(&task->sighand->siglock));
|
||||
|
||||
task->seccomp.mode = seccomp_mode;
|
||||
/*
|
||||
* Make sure TIF_SECCOMP cannot be set before the mode (and
|
||||
* filter) is set.
|
||||
*/
|
||||
smp_mb__before_atomic();
|
||||
set_tsk_thread_flag(task, TIF_SECCOMP);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
/* Returns 1 if the parent is an ancestor of the child. */
|
||||
static int is_ancestor(struct seccomp_filter *parent,
|
||||
struct seccomp_filter *child)
|
||||
{
|
||||
/* NULL is the root ancestor. */
|
||||
if (parent == NULL)
|
||||
return 1;
|
||||
for (; child; child = child->prev)
|
||||
if (child == parent)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_attach_filter: Attaches a seccomp filter to current.
|
||||
* seccomp_can_sync_threads: checks if all threads can be synchronized
|
||||
*
|
||||
* Expects sighand and cred_guard_mutex locks to be held.
|
||||
*
|
||||
* Returns 0 on success, -ve on error, or the pid of a thread which was
|
||||
* either not in the correct seccomp mode or it did not have an ancestral
|
||||
* seccomp filter.
|
||||
*/
|
||||
static inline pid_t seccomp_can_sync_threads(void)
|
||||
{
|
||||
struct task_struct *thread, *caller;
|
||||
|
||||
BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
|
||||
BUG_ON(!spin_is_locked(¤t->sighand->siglock));
|
||||
|
||||
/* Validate all threads being eligible for synchronization. */
|
||||
caller = current;
|
||||
for_each_thread(caller, thread) {
|
||||
pid_t failed;
|
||||
|
||||
/* Skip current, since it is initiating the sync. */
|
||||
if (thread == caller)
|
||||
continue;
|
||||
|
||||
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
|
||||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
|
||||
is_ancestor(thread->seccomp.filter,
|
||||
caller->seccomp.filter)))
|
||||
continue;
|
||||
|
||||
/* Return the first thread that cannot be synchronized. */
|
||||
failed = task_pid_vnr(thread);
|
||||
/* If the pid cannot be resolved, then return -ESRCH */
|
||||
if (unlikely(WARN_ON(failed == 0)))
|
||||
failed = -ESRCH;
|
||||
return failed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_sync_threads: sets all threads to use current's filter
|
||||
*
|
||||
* Expects sighand and cred_guard_mutex locks to be held, and for
|
||||
* seccomp_can_sync_threads() to have returned success already
|
||||
* without dropping the locks.
|
||||
*
|
||||
*/
|
||||
static inline void seccomp_sync_threads(void)
|
||||
{
|
||||
struct task_struct *thread, *caller;
|
||||
|
||||
BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
|
||||
BUG_ON(!spin_is_locked(¤t->sighand->siglock));
|
||||
|
||||
/* Synchronize all threads. */
|
||||
caller = current;
|
||||
for_each_thread(caller, thread) {
|
||||
/* Skip current, since it needs no changes. */
|
||||
if (thread == caller)
|
||||
continue;
|
||||
|
||||
/* Get a task reference for the new leaf node. */
|
||||
get_seccomp_filter(caller);
|
||||
/*
|
||||
* Drop the task reference to the shared ancestor since
|
||||
* current's path will hold a reference. (This also
|
||||
* allows a put before the assignment.)
|
||||
*/
|
||||
put_seccomp_filter(thread);
|
||||
smp_store_release(&thread->seccomp.filter,
|
||||
caller->seccomp.filter);
|
||||
/*
|
||||
* Opt the other thread into seccomp if needed.
|
||||
* As threads are considered to be trust-realm
|
||||
* equivalent (see ptrace_may_access), it is safe to
|
||||
* allow one thread to transition the other.
|
||||
*/
|
||||
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
|
||||
/*
|
||||
* Don't let an unprivileged task work around
|
||||
* the no_new_privs restriction by creating
|
||||
* a thread that sets it up, enters seccomp,
|
||||
* then dies.
|
||||
*/
|
||||
if (task_no_new_privs(caller))
|
||||
task_set_no_new_privs(thread);
|
||||
|
||||
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_prepare_filter: Prepares a seccomp filter for use.
|
||||
* @fprog: BPF program to install
|
||||
*
|
||||
* Returns 0 on success or an errno on failure.
|
||||
* Returns filter on success or an ERR_PTR on failure.
|
||||
*/
|
||||
static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
|
||||
{
|
||||
struct seccomp_filter *filter;
|
||||
unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
|
||||
unsigned long total_insns = fprog->len;
|
||||
unsigned long fp_size;
|
||||
struct sock_filter *fp;
|
||||
int new_len;
|
||||
long ret;
|
||||
|
||||
if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
|
||||
return -EINVAL;
|
||||
|
||||
for (filter = current->seccomp.filter; filter; filter = filter->prev)
|
||||
total_insns += filter->prog->len + 4; /* include a 4 instr penalty */
|
||||
if (total_insns > MAX_INSNS_PER_PATH)
|
||||
return -ENOMEM;
|
||||
return ERR_PTR(-EINVAL);
|
||||
BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
|
||||
fp_size = fprog->len * sizeof(struct sock_filter);
|
||||
|
||||
/*
|
||||
* Installing a seccomp filter requires that the task has
|
||||
@ -224,14 +359,14 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
* This avoids scenarios where unprivileged tasks can affect the
|
||||
* behavior of privileged children.
|
||||
*/
|
||||
if (!current->no_new_privs &&
|
||||
if (!task_no_new_privs(current) &&
|
||||
security_capable_noaudit(current_cred(), current_user_ns(),
|
||||
CAP_SYS_ADMIN) != 0)
|
||||
return -EACCES;
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
|
||||
if (!fp)
|
||||
return -ENOMEM;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* Copy the instructions from fprog. */
|
||||
ret = -EFAULT;
|
||||
@ -275,13 +410,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
|
||||
sk_filter_select_runtime(filter->prog);
|
||||
|
||||
/*
|
||||
* If there is an existing filter, make it the prev and don't drop its
|
||||
* task reference.
|
||||
*/
|
||||
filter->prev = current->seccomp.filter;
|
||||
current->seccomp.filter = filter;
|
||||
return 0;
|
||||
return filter;
|
||||
|
||||
free_filter_prog:
|
||||
kfree(filter->prog);
|
||||
@ -289,19 +418,20 @@ free_filter:
|
||||
kfree(filter);
|
||||
free_prog:
|
||||
kfree(fp);
|
||||
return ret;
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_attach_user_filter - attaches a user-supplied sock_fprog
|
||||
* seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
|
||||
* @user_filter: pointer to the user data containing a sock_fprog.
|
||||
*
|
||||
* Returns 0 on success and non-zero otherwise.
|
||||
*/
|
||||
static long seccomp_attach_user_filter(char __user *user_filter)
|
||||
static struct seccomp_filter *
|
||||
seccomp_prepare_user_filter(const char __user *user_filter)
|
||||
{
|
||||
struct sock_fprog fprog;
|
||||
long ret = -EFAULT;
|
||||
struct seccomp_filter *filter = ERR_PTR(-EFAULT);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (is_compat_task()) {
|
||||
@ -314,9 +444,56 @@ static long seccomp_attach_user_filter(char __user *user_filter)
|
||||
#endif
|
||||
if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
|
||||
goto out;
|
||||
ret = seccomp_attach_filter(&fprog);
|
||||
filter = seccomp_prepare_filter(&fprog);
|
||||
out:
|
||||
return ret;
|
||||
return filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_attach_filter: validate and attach filter
|
||||
* @flags: flags to change filter behavior
|
||||
* @filter: seccomp filter to add to the current process
|
||||
*
|
||||
* Caller must be holding current->sighand->siglock lock.
|
||||
*
|
||||
* Returns 0 on success, -ve on error.
|
||||
*/
|
||||
static long seccomp_attach_filter(unsigned int flags,
|
||||
struct seccomp_filter *filter)
|
||||
{
|
||||
unsigned long total_insns;
|
||||
struct seccomp_filter *walker;
|
||||
|
||||
BUG_ON(!spin_is_locked(¤t->sighand->siglock));
|
||||
|
||||
/* Validate resulting filter length. */
|
||||
total_insns = filter->prog->len;
|
||||
for (walker = current->seccomp.filter; walker; walker = walker->prev)
|
||||
total_insns += walker->prog->len + 4; /* 4 instr penalty */
|
||||
if (total_insns > MAX_INSNS_PER_PATH)
|
||||
return -ENOMEM;
|
||||
|
||||
/* If thread sync has been requested, check that it is possible. */
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
|
||||
int ret;
|
||||
|
||||
ret = seccomp_can_sync_threads();
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is an existing filter, make it the prev and don't drop its
|
||||
* task reference.
|
||||
*/
|
||||
filter->prev = current->seccomp.filter;
|
||||
current->seccomp.filter = filter;
|
||||
|
||||
/* Now that the new filter is in place, synchronize to all threads. */
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
|
||||
seccomp_sync_threads();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get_seccomp_filter - increments the reference count of the filter on @tsk */
|
||||
@ -329,6 +506,14 @@ void get_seccomp_filter(struct task_struct *tsk)
|
||||
atomic_inc(&orig->usage);
|
||||
}
|
||||
|
||||
static inline void seccomp_filter_free(struct seccomp_filter *filter)
|
||||
{
|
||||
if (filter) {
|
||||
sk_filter_free(filter->prog);
|
||||
kfree(filter);
|
||||
}
|
||||
}
|
||||
|
||||
/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
|
||||
void put_seccomp_filter(struct task_struct *tsk)
|
||||
{
|
||||
@ -337,8 +522,7 @@ void put_seccomp_filter(struct task_struct *tsk)
|
||||
while (orig && atomic_dec_and_test(&orig->usage)) {
|
||||
struct seccomp_filter *freeme = orig;
|
||||
orig = orig->prev;
|
||||
sk_filter_free(freeme->prog);
|
||||
kfree(freeme);
|
||||
seccomp_filter_free(freeme);
|
||||
}
|
||||
}
|
||||
|
||||
@ -382,12 +566,17 @@ static int mode1_syscalls_32[] = {
|
||||
|
||||
int __secure_computing(int this_syscall)
|
||||
{
|
||||
int mode = current->seccomp.mode;
|
||||
int exit_sig = 0;
|
||||
int *syscall;
|
||||
u32 ret;
|
||||
|
||||
switch (mode) {
|
||||
/*
|
||||
* Make sure that any changes to mode from another thread have
|
||||
* been seen after TIF_SECCOMP was seen.
|
||||
*/
|
||||
rmb();
|
||||
|
||||
switch (current->seccomp.mode) {
|
||||
case SECCOMP_MODE_STRICT:
|
||||
syscall = mode1_syscalls;
|
||||
#ifdef CONFIG_COMPAT
|
||||
@ -473,47 +662,152 @@ long prctl_get_seccomp(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* prctl_set_seccomp: configures current->seccomp.mode
|
||||
* @seccomp_mode: requested mode to use
|
||||
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
|
||||
*
|
||||
* This function may be called repeatedly with a @seccomp_mode of
|
||||
* SECCOMP_MODE_FILTER to install additional filters. Every filter
|
||||
* successfully installed will be evaluated (in reverse order) for each system
|
||||
* call the task makes.
|
||||
* seccomp_set_mode_strict: internal function for setting strict seccomp
|
||||
*
|
||||
* Once current->seccomp.mode is non-zero, it may not be changed.
|
||||
*
|
||||
* Returns 0 on success or -EINVAL on failure.
|
||||
*/
|
||||
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
|
||||
static long seccomp_set_mode_strict(void)
|
||||
{
|
||||
const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
|
||||
long ret = -EINVAL;
|
||||
|
||||
if (current->seccomp.mode &&
|
||||
current->seccomp.mode != seccomp_mode)
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
||||
if (!seccomp_may_assign_mode(seccomp_mode))
|
||||
goto out;
|
||||
|
||||
#ifdef TIF_NOTSC
|
||||
disable_TSC();
|
||||
#endif
|
||||
seccomp_assign_mode(current, seccomp_mode);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
/**
|
||||
* seccomp_set_mode_filter: internal function for setting seccomp filter
|
||||
* @flags: flags to change filter behavior
|
||||
* @filter: struct sock_fprog containing filter
|
||||
*
|
||||
* This function may be called repeatedly to install additional filters.
|
||||
* Every filter successfully installed will be evaluated (in reverse order)
|
||||
* for each system call the task makes.
|
||||
*
|
||||
* Once current->seccomp.mode is non-zero, it may not be changed.
|
||||
*
|
||||
* Returns 0 on success or -EINVAL on failure.
|
||||
*/
|
||||
static long seccomp_set_mode_filter(unsigned int flags,
|
||||
const char __user *filter)
|
||||
{
|
||||
const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
|
||||
struct seccomp_filter *prepared = NULL;
|
||||
long ret = -EINVAL;
|
||||
|
||||
/* Validate flags. */
|
||||
if (flags & ~SECCOMP_FILTER_FLAG_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* Prepare the new filter before holding any locks. */
|
||||
prepared = seccomp_prepare_user_filter(filter);
|
||||
if (IS_ERR(prepared))
|
||||
return PTR_ERR(prepared);
|
||||
|
||||
/*
|
||||
* Make sure we cannot change seccomp or nnp state via TSYNC
|
||||
* while another thread is in the middle of calling exec.
|
||||
*/
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
|
||||
mutex_lock_killable(¤t->signal->cred_guard_mutex))
|
||||
goto out_free;
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
||||
if (!seccomp_may_assign_mode(seccomp_mode))
|
||||
goto out;
|
||||
|
||||
ret = seccomp_attach_filter(flags, prepared);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* Do not free the successfully attached filter. */
|
||||
prepared = NULL;
|
||||
|
||||
seccomp_assign_mode(current, seccomp_mode);
|
||||
out:
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
|
||||
mutex_unlock(¤t->signal->cred_guard_mutex);
|
||||
out_free:
|
||||
seccomp_filter_free(prepared);
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static inline long seccomp_set_mode_filter(unsigned int flags,
|
||||
const char __user *filter)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Common entry point for both prctl and syscall. */
|
||||
static long do_seccomp(unsigned int op, unsigned int flags,
|
||||
const char __user *uargs)
|
||||
{
|
||||
switch (op) {
|
||||
case SECCOMP_SET_MODE_STRICT:
|
||||
if (flags != 0 || uargs != NULL)
|
||||
return -EINVAL;
|
||||
return seccomp_set_mode_strict();
|
||||
case SECCOMP_SET_MODE_FILTER:
|
||||
return seccomp_set_mode_filter(flags, uargs);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
|
||||
const char __user *, uargs)
|
||||
{
|
||||
return do_seccomp(op, flags, uargs);
|
||||
}
|
||||
|
||||
/**
|
||||
* prctl_set_seccomp: configures current->seccomp.mode
|
||||
* @seccomp_mode: requested mode to use
|
||||
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
|
||||
*
|
||||
* Returns 0 on success or -EINVAL on failure.
|
||||
*/
|
||||
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
|
||||
{
|
||||
unsigned int op;
|
||||
char __user *uargs;
|
||||
|
||||
switch (seccomp_mode) {
|
||||
case SECCOMP_MODE_STRICT:
|
||||
ret = 0;
|
||||
#ifdef TIF_NOTSC
|
||||
disable_TSC();
|
||||
#endif
|
||||
op = SECCOMP_SET_MODE_STRICT;
|
||||
/*
|
||||
* Setting strict mode through prctl always ignored filter,
|
||||
* so make sure it is always NULL here to pass the internal
|
||||
* check in do_seccomp().
|
||||
*/
|
||||
uargs = NULL;
|
||||
break;
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
case SECCOMP_MODE_FILTER:
|
||||
ret = seccomp_attach_user_filter(filter);
|
||||
if (ret)
|
||||
goto out;
|
||||
op = SECCOMP_SET_MODE_FILTER;
|
||||
uargs = filter;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
current->seccomp.mode = seccomp_mode;
|
||||
set_thread_flag(TIF_SECCOMP);
|
||||
out:
|
||||
return ret;
|
||||
/* prctl interface doesn't have flags, so they are always zero. */
|
||||
return do_seccomp(op, 0, uargs);
|
||||
}
|
||||
|
@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
if (arg2 != 1 || arg3 || arg4 || arg5)
|
||||
return -EINVAL;
|
||||
|
||||
current->no_new_privs = 1;
|
||||
task_set_no_new_privs(current);
|
||||
break;
|
||||
case PR_GET_NO_NEW_PRIVS:
|
||||
if (arg2 || arg3 || arg4 || arg5)
|
||||
return -EINVAL;
|
||||
return current->no_new_privs ? 1 : 0;
|
||||
return task_no_new_privs(current) ? 1 : 0;
|
||||
case PR_GET_THP_DISABLE:
|
||||
if (arg2 || arg3 || arg4 || arg5)
|
||||
return -EINVAL;
|
||||
|
@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
|
||||
|
||||
/* compare kernel pointers */
|
||||
cond_syscall(sys_kcmp);
|
||||
|
||||
/* operate on Secure Computing state */
|
||||
cond_syscall(sys_seccomp);
|
||||
|
@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
|
||||
* There is no exception for unconfined as change_hat is not
|
||||
* available.
|
||||
*/
|
||||
if (current->no_new_privs)
|
||||
if (task_no_new_privs(current))
|
||||
return -EPERM;
|
||||
|
||||
/* released below */
|
||||
@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
|
||||
* no_new_privs is set because this aways results in a reduction
|
||||
* of permissions.
|
||||
*/
|
||||
if (current->no_new_privs && !unconfined(profile)) {
|
||||
if (task_no_new_privs(current) && !unconfined(profile)) {
|
||||
put_cred(cred);
|
||||
return -EPERM;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user