forked from Minki/linux
seccomp updates for v5.9-rc1
- Improved selftest coverage, timeouts, and reporting - Add EPOLLHUP support for SECCOMP_RET_USER_NOTIF (Christian Brauner) - Refactor __scm_install_fd() into __receive_fd() and fix buggy callers - Introduce "addfd" command for SECCOMP_RET_USER_NOTIF (Sargun Dhillon) -----BEGIN PGP SIGNATURE----- iQJKBAABCgA0FiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAl8oZcQWHGtlZXNjb29r QGNocm9taXVtLm9yZwAKCRCJcvTf3G3AJomDD/4x3j7eXREcXDsHOmlgEaHWGx4l JldHFQhV5GjmD7gOkPcoZSG7NfG7F6VpwAJg7ZoR3qUkem7K8DFucxqgo1RldCot nigleeLX6JeMS0Z+iwjAVZd+5t4xG4J/7GGDHIIMiG5qvwJ0Yf64o1bkjaB2Q/Bv tluBg0WF32kFMG/ZwyY/V2QDbbue97CFPflybOh1o2nWbVzmUlFEEum3UUvZsxc8 smMsattJyuAV7kcEKzKrs8b010NdFZqwdbub5Np9W3XEXGBYMdIPoNsOQGmB9wby j2ui0lzboXRG997jM7TCd1l/XZAv8aAwvPplw3FJRybzkOGs9NDyLMoz87yJpR1T xp511vnMyMbyKIGdungkt7cIyzaictHwaYzznsmuNdCPEjTaIQJr1ctsa4GEgtqf pnkktZ9YbMCcHU0CtZ8GlOVqA9wE+FUm0/u0zgikzJQsB+HcNItiARTTTHRyco7p VJCqK8o4Zx4ELV7QNkSH4nhFkVgRopvrvBiPAGro/qwGOofBg8W8wM8O1+V/MDmp zSU22v4SncT1Xb7dtmdJqDEeHfDikhaCAb4Je2hsGQWzbdAqwHGlpa7vpk9x3Q5r L+XyP+Z+rPHlXYyypJwUvvOQhXOmP0zYxcEHxByqIBfXiwy+3dN4tDDfatWbccwl uTlTDM8kmQn6QzSztA== =yb55 -----END PGP SIGNATURE----- Merge tag 'seccomp-v5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux Pull seccomp updates from Kees Cook: "There are a bunch of clean ups and selftest improvements along with two major updates to the SECCOMP_RET_USER_NOTIF filter return: EPOLLHUP support to more easily detect the death of a monitored process, and being able to inject fds when intercepting syscalls that expect an fd-opening side-effect (needed by both container folks and Chrome). The latter continued the refactoring of __scm_install_fd() started by Christoph, and in the process found and fixed a handful of bugs in various callers. - Improved selftest coverage, timeouts, and reporting - Add EPOLLHUP support for SECCOMP_RET_USER_NOTIF (Christian Brauner) - Refactor __scm_install_fd() into __receive_fd() and fix buggy callers - Introduce 'addfd' command for SECCOMP_RET_USER_NOTIF (Sargun Dhillon)" * tag 'seccomp-v5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (30 commits) selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD seccomp: Introduce addfd ioctl to seccomp user notifier fs: Expand __receive_fd() to accept existing fd pidfd: Replace open-coded receive_fd() fs: Add receive_fd() wrapper for __receive_fd() fs: Move __scm_install_fd() to __receive_fd() net/scm: Regularize compat handling of scm_detach_fds() pidfd: Add missing sock updates for pidfd_getfd() net/compat: Add missing sock updates for SCM_RIGHTS selftests/seccomp: Check ENOSYS under tracing selftests/seccomp: Refactor to use fixture variants selftests/harness: Clean up kern-doc for fixtures seccomp: Use -1 marker for end of mode 1 syscall list seccomp: Fix ioctl number for SECCOMP_IOCTL_NOTIF_ID_VALID selftests/seccomp: Rename user_trap_syscall() to user_notif_syscall() selftests/seccomp: Make kcmp() less required seccomp: Use pr_fmt selftests/seccomp: Improve calibration loop selftests/seccomp: use 90s as timeout selftests/seccomp: Expand benchmark to per-filter measurements ...
This commit is contained in:
commit
9ecc6ea491
@ -9,12 +9,12 @@ static inline const int *get_compat_mode1_syscalls(void)
|
||||
static const int syscalls_O32[] = {
|
||||
__NR_O32_Linux + 3, __NR_O32_Linux + 4,
|
||||
__NR_O32_Linux + 1, __NR_O32_Linux + 193,
|
||||
0, /* null terminated */
|
||||
-1, /* negative terminated */
|
||||
};
|
||||
static const int syscalls_N32[] = {
|
||||
__NR_N32_Linux + 0, __NR_N32_Linux + 1,
|
||||
__NR_N32_Linux + 58, __NR_N32_Linux + 211,
|
||||
0, /* null terminated */
|
||||
-1, /* negative terminated */
|
||||
};
|
||||
|
||||
if (IS_ENABLED(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS))
|
||||
|
61
fs/file.c
61
fs/file.c
@ -18,6 +18,7 @@
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
|
||||
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
|
||||
@ -613,6 +614,10 @@ void __fd_install(struct files_struct *files, unsigned int fd,
|
||||
rcu_read_unlock_sched();
|
||||
}
|
||||
|
||||
/*
|
||||
* This consumes the "file" refcount, so callers should treat it
|
||||
* as if they had called fput(file).
|
||||
*/
|
||||
void fd_install(unsigned int fd, struct file *file)
|
||||
{
|
||||
__fd_install(current->files, fd, file);
|
||||
@ -931,6 +936,62 @@ out_unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* __receive_fd() - Install received file into file descriptor table
|
||||
*
|
||||
* @fd: fd to install into (if negative, a new fd will be allocated)
|
||||
* @file: struct file that was received from another process
|
||||
* @ufd: __user pointer to write new fd number to
|
||||
* @o_flags: the O_* flags to apply to the new fd entry
|
||||
*
|
||||
* Installs a received file into the file descriptor table, with appropriate
|
||||
* checks and count updates. Optionally writes the fd number to userspace, if
|
||||
* @ufd is non-NULL.
|
||||
*
|
||||
* This helper handles its own reference counting of the incoming
|
||||
* struct file.
|
||||
*
|
||||
* Returns newly install fd or -ve on error.
|
||||
*/
|
||||
int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
{
|
||||
int new_fd;
|
||||
int error;
|
||||
|
||||
error = security_file_receive(file);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (fd < 0) {
|
||||
new_fd = get_unused_fd_flags(o_flags);
|
||||
if (new_fd < 0)
|
||||
return new_fd;
|
||||
} else {
|
||||
new_fd = fd;
|
||||
}
|
||||
|
||||
if (ufd) {
|
||||
error = put_user(new_fd, ufd);
|
||||
if (error) {
|
||||
if (fd < 0)
|
||||
put_unused_fd(new_fd);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (fd < 0) {
|
||||
fd_install(new_fd, get_file(file));
|
||||
} else {
|
||||
error = replace_fd(new_fd, file, o_flags);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Bump the sock usage counts, if any. */
|
||||
__receive_sock(file);
|
||||
return new_fd;
|
||||
}
|
||||
|
||||
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
|
||||
{
|
||||
int err = -EBADF;
|
||||
|
@ -341,6 +341,8 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
|
||||
seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
|
||||
#ifdef CONFIG_SECCOMP
|
||||
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
|
||||
seq_put_decimal_ull(m, "\nSeccomp_filters:\t",
|
||||
atomic_read(&p->seccomp.filter_count));
|
||||
#endif
|
||||
seq_puts(m, "\nSpeculation_Store_Bypass:\t");
|
||||
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
|
||||
|
@ -33,7 +33,7 @@ static inline const int *get_compat_mode1_syscalls(void)
|
||||
static const int mode1_syscalls_32[] = {
|
||||
__NR_seccomp_read_32, __NR_seccomp_write_32,
|
||||
__NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
|
||||
0, /* null terminated */
|
||||
-1, /* negative terminated */
|
||||
};
|
||||
return mode1_syscalls_32;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/posix_types.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
struct file;
|
||||
|
||||
@ -91,6 +92,24 @@ extern void put_unused_fd(unsigned int fd);
|
||||
|
||||
extern void fd_install(unsigned int fd, struct file *file);
|
||||
|
||||
extern int __receive_fd(int fd, struct file *file, int __user *ufd,
|
||||
unsigned int o_flags);
|
||||
static inline int receive_fd_user(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags)
|
||||
{
|
||||
if (ufd == NULL)
|
||||
return -EFAULT;
|
||||
return __receive_fd(-1, file, ufd, o_flags);
|
||||
}
|
||||
static inline int receive_fd(struct file *file, unsigned int o_flags)
|
||||
{
|
||||
return __receive_fd(-1, file, NULL, o_flags);
|
||||
}
|
||||
static inline int receive_fd_replace(int fd, struct file *file, unsigned int o_flags)
|
||||
{
|
||||
return __receive_fd(fd, file, NULL, o_flags);
|
||||
}
|
||||
|
||||
extern void flush_delayed_fput(void);
|
||||
extern void __fput_sync(struct file *);
|
||||
|
||||
|
@ -10,9 +10,14 @@
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER | \
|
||||
SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
|
||||
|
||||
/* sizeof() the first published struct seccomp_notif_addfd */
|
||||
#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
|
||||
#define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
|
||||
#include <linux/thread_info.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/seccomp.h>
|
||||
|
||||
struct seccomp_filter;
|
||||
@ -29,6 +34,7 @@ struct seccomp_filter;
|
||||
*/
|
||||
struct seccomp {
|
||||
int mode;
|
||||
atomic_t filter_count;
|
||||
struct seccomp_filter *filter;
|
||||
};
|
||||
|
||||
@ -82,10 +88,10 @@ static inline int seccomp_mode(struct seccomp *s)
|
||||
#endif /* CONFIG_SECCOMP */
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
extern void put_seccomp_filter(struct task_struct *tsk);
|
||||
extern void seccomp_filter_release(struct task_struct *tsk);
|
||||
extern void get_seccomp_filter(struct task_struct *tsk);
|
||||
#else /* CONFIG_SECCOMP_FILTER */
|
||||
static inline void put_seccomp_filter(struct task_struct *tsk)
|
||||
static inline void seccomp_filter_release(struct task_struct *tsk)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -891,6 +891,8 @@ static inline int sk_memalloc_socks(void)
|
||||
{
|
||||
return static_branch_unlikely(&memalloc_socks_key);
|
||||
}
|
||||
|
||||
void __receive_sock(struct file *file);
|
||||
#else
|
||||
|
||||
static inline int sk_memalloc_socks(void)
|
||||
@ -898,6 +900,8 @@ static inline int sk_memalloc_socks(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __receive_sock(struct file *file)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
|
||||
|
@ -113,6 +113,25 @@ struct seccomp_notif_resp {
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
/* valid flags for seccomp_notif_addfd */
|
||||
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
|
||||
|
||||
/**
|
||||
* struct seccomp_notif_addfd
|
||||
* @id: The ID of the seccomp notification
|
||||
* @flags: SECCOMP_ADDFD_FLAG_*
|
||||
* @srcfd: The local fd number
|
||||
* @newfd: Optional remote FD number if SETFD option is set, otherwise 0.
|
||||
* @newfd_flags: The O_* flags the remote FD should have applied
|
||||
*/
|
||||
struct seccomp_notif_addfd {
|
||||
__u64 id;
|
||||
__u32 flags;
|
||||
__u32 srcfd;
|
||||
__u32 newfd;
|
||||
__u32 newfd_flags;
|
||||
};
|
||||
|
||||
#define SECCOMP_IOC_MAGIC '!'
|
||||
#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
|
||||
#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
|
||||
@ -123,5 +142,9 @@ struct seccomp_notif_resp {
|
||||
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
|
||||
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
|
||||
struct seccomp_notif_resp)
|
||||
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
|
||||
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
|
||||
/* On success, the return value is the remote process's added fd number */
|
||||
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
|
||||
struct seccomp_notif_addfd)
|
||||
|
||||
#endif /* _UAPI_LINUX_SECCOMP_H */
|
||||
|
@ -204,6 +204,9 @@ struct task_struct init_task
|
||||
#ifdef CONFIG_SECURITY
|
||||
.security = NULL,
|
||||
#endif
|
||||
#ifdef CONFIG_SECCOMP
|
||||
.seccomp = { .filter_count = ATOMIC_INIT(0) },
|
||||
#endif
|
||||
};
|
||||
EXPORT_SYMBOL(init_task);
|
||||
|
||||
|
@ -217,6 +217,7 @@ repeat:
|
||||
}
|
||||
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
seccomp_filter_release(p);
|
||||
proc_flush_pid(thread_pid);
|
||||
put_pid(thread_pid);
|
||||
release_thread(p);
|
||||
|
@ -479,7 +479,6 @@ void free_task(struct task_struct *tsk)
|
||||
#endif
|
||||
rt_mutex_debug_task_free(tsk);
|
||||
ftrace_graph_exit_task(tsk);
|
||||
put_seccomp_filter(tsk);
|
||||
arch_release_task_struct(tsk);
|
||||
if (tsk->flags & PF_KTHREAD)
|
||||
free_kthread_struct(tsk);
|
||||
|
14
kernel/pid.c
14
kernel/pid.c
@ -42,6 +42,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/idr.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
struct pid init_struct_pid = {
|
||||
.count = REFCOUNT_INIT(1),
|
||||
@ -635,17 +636,8 @@ static int pidfd_getfd(struct pid *pid, int fd)
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ret = security_file_receive(file);
|
||||
if (ret) {
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (ret < 0)
|
||||
fput(file);
|
||||
else
|
||||
fd_install(ret, file);
|
||||
ret = receive_fd(file, O_CLOEXEC);
|
||||
fput(file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
376
kernel/seccomp.c
376
kernel/seccomp.c
@ -13,6 +13,7 @@
|
||||
* Mode 2 allows user-defined system call filters in the form
|
||||
* of Berkeley Packet Filters/Linux Socket Filters.
|
||||
*/
|
||||
#define pr_fmt(fmt) "seccomp: " fmt
|
||||
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/audit.h>
|
||||
@ -41,6 +42,15 @@
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
/*
|
||||
* When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the
|
||||
* wrong direction flag in the ioctl number. This is the broken one,
|
||||
* which the kernel needs to keep supporting until all userspaces stop
|
||||
* using the wrong command number.
|
||||
*/
|
||||
#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
|
||||
|
||||
enum notify_state {
|
||||
SECCOMP_NOTIFY_INIT,
|
||||
@ -77,10 +87,42 @@ struct seccomp_knotif {
|
||||
long val;
|
||||
u32 flags;
|
||||
|
||||
/* Signals when this has entered SECCOMP_NOTIFY_REPLIED */
|
||||
/*
|
||||
* Signals when this has changed states, such as the listener
|
||||
* dying, a new seccomp addfd message, or changing to REPLIED
|
||||
*/
|
||||
struct completion ready;
|
||||
|
||||
struct list_head list;
|
||||
|
||||
/* outstanding addfd requests */
|
||||
struct list_head addfd;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct seccomp_kaddfd - container for seccomp_addfd ioctl messages
|
||||
*
|
||||
* @file: A reference to the file to install in the other task
|
||||
* @fd: The fd number to install it at. If the fd number is -1, it means the
|
||||
* installing process should allocate the fd as normal.
|
||||
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
|
||||
* is allowed.
|
||||
* @ret: The return value of the installing process. It is set to the fd num
|
||||
* upon success (>= 0).
|
||||
* @completion: Indicates that the installing process has completed fd
|
||||
* installation, or gone away (either due to successful
|
||||
* reply, or signal)
|
||||
*
|
||||
*/
|
||||
struct seccomp_kaddfd {
|
||||
struct file *file;
|
||||
int fd;
|
||||
unsigned int flags;
|
||||
|
||||
/* To only be set on reply */
|
||||
int ret;
|
||||
struct completion completion;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -94,27 +136,35 @@ struct seccomp_knotif {
|
||||
* filter->notify_lock.
|
||||
* @next_id: The id of the next request.
|
||||
* @notifications: A list of struct seccomp_knotif elements.
|
||||
* @wqh: A wait queue for poll.
|
||||
*/
|
||||
struct notification {
|
||||
struct semaphore request;
|
||||
u64 next_id;
|
||||
struct list_head notifications;
|
||||
wait_queue_head_t wqh;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct seccomp_filter - container for seccomp BPF programs
|
||||
*
|
||||
* @usage: reference count to manage the object lifetime.
|
||||
* get/put helpers should be used when accessing an instance
|
||||
* outside of a lifetime-guarded section. In general, this
|
||||
* is only needed for handling filters shared across tasks.
|
||||
* @refs: Reference count to manage the object lifetime.
|
||||
* A filter's reference count is incremented for each directly
|
||||
* attached task, once for the dependent filter, and if
|
||||
* requested for the user notifier. When @refs reaches zero,
|
||||
* the filter can be freed.
|
||||
* @users: A filter's @users count is incremented for each directly
|
||||
* attached task (filter installation, fork(), thread_sync),
|
||||
* and once for the dependent filter (tracked in filter->prev).
|
||||
* When it reaches zero it indicates that no direct or indirect
|
||||
* users of that filter exist. No new tasks can get associated with
|
||||
* this filter after reaching 0. The @users count is always smaller
|
||||
* or equal to @refs. Hence, reaching 0 for @users does not mean
|
||||
* the filter can be freed.
|
||||
* @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
|
||||
* @prev: points to a previously installed, or inherited, filter
|
||||
* @prog: the BPF program to evaluate
|
||||
* @notif: the struct that holds all notification related information
|
||||
* @notify_lock: A lock for all notification-related accesses.
|
||||
* @wqh: A wait queue for poll if a notifier is in use.
|
||||
*
|
||||
* seccomp_filter objects are organized in a tree linked via the @prev
|
||||
* pointer. For any task, it appears to be a singly-linked list starting
|
||||
@ -124,15 +174,17 @@ struct notification {
|
||||
* how namespaces work.
|
||||
*
|
||||
* seccomp_filter objects should never be modified after being attached
|
||||
* to a task_struct (other than @usage).
|
||||
* to a task_struct (other than @refs).
|
||||
*/
|
||||
struct seccomp_filter {
|
||||
refcount_t usage;
|
||||
refcount_t refs;
|
||||
refcount_t users;
|
||||
bool log;
|
||||
struct seccomp_filter *prev;
|
||||
struct bpf_prog *prog;
|
||||
struct notification *notif;
|
||||
struct mutex notify_lock;
|
||||
wait_queue_head_t wqh;
|
||||
};
|
||||
|
||||
/* Limit any path through the tree to 256KB worth of instructions. */
|
||||
@ -366,6 +418,59 @@ static inline pid_t seccomp_can_sync_threads(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void seccomp_filter_free(struct seccomp_filter *filter)
|
||||
{
|
||||
if (filter) {
|
||||
bpf_prog_destroy(filter->prog);
|
||||
kfree(filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void __seccomp_filter_orphan(struct seccomp_filter *orig)
|
||||
{
|
||||
while (orig && refcount_dec_and_test(&orig->users)) {
|
||||
if (waitqueue_active(&orig->wqh))
|
||||
wake_up_poll(&orig->wqh, EPOLLHUP);
|
||||
orig = orig->prev;
|
||||
}
|
||||
}
|
||||
|
||||
static void __put_seccomp_filter(struct seccomp_filter *orig)
|
||||
{
|
||||
/* Clean up single-reference branches iteratively. */
|
||||
while (orig && refcount_dec_and_test(&orig->refs)) {
|
||||
struct seccomp_filter *freeme = orig;
|
||||
orig = orig->prev;
|
||||
seccomp_filter_free(freeme);
|
||||
}
|
||||
}
|
||||
|
||||
static void __seccomp_filter_release(struct seccomp_filter *orig)
|
||||
{
|
||||
/* Notify about any unused filters in the task's former filter tree. */
|
||||
__seccomp_filter_orphan(orig);
|
||||
/* Finally drop all references to the task's former tree. */
|
||||
__put_seccomp_filter(orig);
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_filter_release - Detach the task from its filter tree,
|
||||
* drop its reference count, and notify
|
||||
* about unused filters
|
||||
*
|
||||
* This function should only be called when the task is exiting as
|
||||
* it detaches it from its filter tree. As such, READ_ONCE() and
|
||||
* barriers are not needed here, as would normally be needed.
|
||||
*/
|
||||
void seccomp_filter_release(struct task_struct *tsk)
|
||||
{
|
||||
struct seccomp_filter *orig = tsk->seccomp.filter;
|
||||
|
||||
/* Detach task from its filter tree. */
|
||||
tsk->seccomp.filter = NULL;
|
||||
__seccomp_filter_release(orig);
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_sync_threads: sets all threads to use current's filter
|
||||
*
|
||||
@ -390,14 +495,19 @@ static inline void seccomp_sync_threads(unsigned long flags)
|
||||
|
||||
/* Get a task reference for the new leaf node. */
|
||||
get_seccomp_filter(caller);
|
||||
|
||||
/*
|
||||
* Drop the task reference to the shared ancestor since
|
||||
* current's path will hold a reference. (This also
|
||||
* allows a put before the assignment.)
|
||||
*/
|
||||
put_seccomp_filter(thread);
|
||||
__seccomp_filter_release(thread->seccomp.filter);
|
||||
|
||||
/* Make our new filter tree visible. */
|
||||
smp_store_release(&thread->seccomp.filter,
|
||||
caller->seccomp.filter);
|
||||
atomic_set(&thread->seccomp.filter_count,
|
||||
atomic_read(&thread->seccomp.filter_count));
|
||||
|
||||
/*
|
||||
* Don't let an unprivileged task work around
|
||||
@ -461,7 +571,9 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
refcount_set(&sfilter->usage, 1);
|
||||
refcount_set(&sfilter->refs, 1);
|
||||
refcount_set(&sfilter->users, 1);
|
||||
init_waitqueue_head(&sfilter->wqh);
|
||||
|
||||
return sfilter;
|
||||
}
|
||||
@ -544,6 +656,7 @@ static long seccomp_attach_filter(unsigned int flags,
|
||||
*/
|
||||
filter->prev = current->seccomp.filter;
|
||||
current->seccomp.filter = filter;
|
||||
atomic_inc(¤t->seccomp.filter_count);
|
||||
|
||||
/* Now that the new filter is in place, synchronize to all threads. */
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
|
||||
@ -554,7 +667,7 @@ static long seccomp_attach_filter(unsigned int flags,
|
||||
|
||||
static void __get_seccomp_filter(struct seccomp_filter *filter)
|
||||
{
|
||||
refcount_inc(&filter->usage);
|
||||
refcount_inc(&filter->refs);
|
||||
}
|
||||
|
||||
/* get_seccomp_filter - increments the reference count of the filter on @tsk */
|
||||
@ -564,30 +677,7 @@ void get_seccomp_filter(struct task_struct *tsk)
|
||||
if (!orig)
|
||||
return;
|
||||
__get_seccomp_filter(orig);
|
||||
}
|
||||
|
||||
static inline void seccomp_filter_free(struct seccomp_filter *filter)
|
||||
{
|
||||
if (filter) {
|
||||
bpf_prog_destroy(filter->prog);
|
||||
kfree(filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void __put_seccomp_filter(struct seccomp_filter *orig)
|
||||
{
|
||||
/* Clean up single-reference branches iteratively. */
|
||||
while (orig && refcount_dec_and_test(&orig->usage)) {
|
||||
struct seccomp_filter *freeme = orig;
|
||||
orig = orig->prev;
|
||||
seccomp_filter_free(freeme);
|
||||
}
|
||||
}
|
||||
|
||||
/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
|
||||
void put_seccomp_filter(struct task_struct *tsk)
|
||||
{
|
||||
__put_seccomp_filter(tsk->seccomp.filter);
|
||||
refcount_inc(&orig->users);
|
||||
}
|
||||
|
||||
static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
|
||||
@ -684,20 +774,20 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
|
||||
*/
|
||||
static const int mode1_syscalls[] = {
|
||||
__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
|
||||
0, /* null terminated */
|
||||
-1, /* negative terminated */
|
||||
};
|
||||
|
||||
static void __secure_computing_strict(int this_syscall)
|
||||
{
|
||||
const int *syscall_whitelist = mode1_syscalls;
|
||||
const int *allowed_syscalls = mode1_syscalls;
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (in_compat_syscall())
|
||||
syscall_whitelist = get_compat_mode1_syscalls();
|
||||
allowed_syscalls = get_compat_mode1_syscalls();
|
||||
#endif
|
||||
do {
|
||||
if (*syscall_whitelist == this_syscall)
|
||||
if (*allowed_syscalls == this_syscall)
|
||||
return;
|
||||
} while (*++syscall_whitelist);
|
||||
} while (*++allowed_syscalls != -1);
|
||||
|
||||
#ifdef SECCOMP_DEBUG
|
||||
dump_stack();
|
||||
@ -735,6 +825,17 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
|
||||
return filter->notif->next_id++;
|
||||
}
|
||||
|
||||
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
|
||||
{
|
||||
/*
|
||||
* Remove the notification, and reset the list pointers, indicating
|
||||
* that it has been handled.
|
||||
*/
|
||||
list_del_init(&addfd->list);
|
||||
addfd->ret = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
|
||||
complete(&addfd->completion);
|
||||
}
|
||||
|
||||
static int seccomp_do_user_notification(int this_syscall,
|
||||
struct seccomp_filter *match,
|
||||
const struct seccomp_data *sd)
|
||||
@ -743,6 +844,7 @@ static int seccomp_do_user_notification(int this_syscall,
|
||||
u32 flags = 0;
|
||||
long ret = 0;
|
||||
struct seccomp_knotif n = {};
|
||||
struct seccomp_kaddfd *addfd, *tmp;
|
||||
|
||||
mutex_lock(&match->notify_lock);
|
||||
err = -ENOSYS;
|
||||
@ -755,25 +857,43 @@ static int seccomp_do_user_notification(int this_syscall,
|
||||
n.id = seccomp_next_notify_id(match);
|
||||
init_completion(&n.ready);
|
||||
list_add(&n.list, &match->notif->notifications);
|
||||
INIT_LIST_HEAD(&n.addfd);
|
||||
|
||||
up(&match->notif->request);
|
||||
wake_up_poll(&match->notif->wqh, EPOLLIN | EPOLLRDNORM);
|
||||
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
|
||||
mutex_unlock(&match->notify_lock);
|
||||
|
||||
/*
|
||||
* This is where we wait for a reply from userspace.
|
||||
*/
|
||||
wait:
|
||||
err = wait_for_completion_interruptible(&n.ready);
|
||||
mutex_lock(&match->notify_lock);
|
||||
if (err == 0) {
|
||||
/* Check if we were woken up by a addfd message */
|
||||
addfd = list_first_entry_or_null(&n.addfd,
|
||||
struct seccomp_kaddfd, list);
|
||||
if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) {
|
||||
seccomp_handle_addfd(addfd);
|
||||
mutex_unlock(&match->notify_lock);
|
||||
goto wait;
|
||||
}
|
||||
ret = n.val;
|
||||
err = n.error;
|
||||
flags = n.flags;
|
||||
}
|
||||
|
||||
/* If there were any pending addfd calls, clear them out */
|
||||
list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
|
||||
/* The process went away before we got a chance to handle it */
|
||||
addfd->ret = -ESRCH;
|
||||
list_del_init(&addfd->list);
|
||||
complete(&addfd->completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that it's possible the listener died in between the time when
|
||||
* we were notified of a respons (or a signal) and when we were able to
|
||||
* we were notified of a response (or a signal) and when we were able to
|
||||
* re-acquire the lock, so only delete from the list if the
|
||||
* notification actually exists.
|
||||
*
|
||||
@ -1011,6 +1131,11 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
|
||||
knotif->error = -ENOSYS;
|
||||
knotif->val = 0;
|
||||
|
||||
/*
|
||||
* We do not need to wake up any pending addfd messages, as
|
||||
* the notifier will do that for us, as this just looks
|
||||
* like a standard reply.
|
||||
*/
|
||||
complete(&knotif->ready);
|
||||
}
|
||||
|
||||
@ -1021,6 +1146,23 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with notif_lock held */
|
||||
static inline struct seccomp_knotif *
|
||||
find_notification(struct seccomp_filter *filter, u64 id)
|
||||
{
|
||||
struct seccomp_knotif *cur;
|
||||
|
||||
lockdep_assert_held(&filter->notify_lock);
|
||||
|
||||
list_for_each_entry(cur, &filter->notif->notifications, list) {
|
||||
if (cur->id == id)
|
||||
return cur;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static long seccomp_notify_recv(struct seccomp_filter *filter,
|
||||
void __user *buf)
|
||||
{
|
||||
@ -1064,7 +1206,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
|
||||
unotif.data = *(knotif->data);
|
||||
|
||||
knotif->state = SECCOMP_NOTIFY_SENT;
|
||||
wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM);
|
||||
wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
|
||||
ret = 0;
|
||||
out:
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
@ -1078,15 +1220,8 @@ out:
|
||||
* may have died when we released the lock, so we need to make
|
||||
* sure it's still around.
|
||||
*/
|
||||
knotif = NULL;
|
||||
mutex_lock(&filter->notify_lock);
|
||||
list_for_each_entry(cur, &filter->notif->notifications, list) {
|
||||
if (cur->id == unotif.id) {
|
||||
knotif = cur;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
knotif = find_notification(filter, unotif.id);
|
||||
if (knotif) {
|
||||
knotif->state = SECCOMP_NOTIFY_INIT;
|
||||
up(&filter->notif->request);
|
||||
@ -1101,7 +1236,7 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
|
||||
void __user *buf)
|
||||
{
|
||||
struct seccomp_notif_resp resp = {};
|
||||
struct seccomp_knotif *knotif = NULL, *cur;
|
||||
struct seccomp_knotif *knotif;
|
||||
long ret;
|
||||
|
||||
if (copy_from_user(&resp, buf, sizeof(resp)))
|
||||
@ -1118,13 +1253,7 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
list_for_each_entry(cur, &filter->notif->notifications, list) {
|
||||
if (cur->id == resp.id) {
|
||||
knotif = cur;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
knotif = find_notification(filter, resp.id);
|
||||
if (!knotif) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
@ -1150,7 +1279,7 @@ out:
|
||||
static long seccomp_notify_id_valid(struct seccomp_filter *filter,
|
||||
void __user *buf)
|
||||
{
|
||||
struct seccomp_knotif *knotif = NULL;
|
||||
struct seccomp_knotif *knotif;
|
||||
u64 id;
|
||||
long ret;
|
||||
|
||||
@ -1161,17 +1290,109 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = -ENOENT;
|
||||
list_for_each_entry(knotif, &filter->notif->notifications, list) {
|
||||
if (knotif->id == id) {
|
||||
if (knotif->state == SECCOMP_NOTIFY_SENT)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
knotif = find_notification(filter, id);
|
||||
if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
|
||||
ret = 0;
|
||||
else
|
||||
ret = -ENOENT;
|
||||
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long seccomp_notify_addfd(struct seccomp_filter *filter,
|
||||
struct seccomp_notif_addfd __user *uaddfd,
|
||||
unsigned int size)
|
||||
{
|
||||
struct seccomp_notif_addfd addfd;
|
||||
struct seccomp_knotif *knotif;
|
||||
struct seccomp_kaddfd kaddfd;
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
|
||||
BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
|
||||
|
||||
if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (addfd.newfd_flags & ~O_CLOEXEC)
|
||||
return -EINVAL;
|
||||
|
||||
if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
|
||||
return -EINVAL;
|
||||
|
||||
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
|
||||
return -EINVAL;
|
||||
|
||||
kaddfd.file = fget(addfd.srcfd);
|
||||
if (!kaddfd.file)
|
||||
return -EBADF;
|
||||
|
||||
kaddfd.flags = addfd.newfd_flags;
|
||||
kaddfd.fd = (addfd.flags & SECCOMP_ADDFD_FLAG_SETFD) ?
|
||||
addfd.newfd : -1;
|
||||
init_completion(&kaddfd.completion);
|
||||
|
||||
ret = mutex_lock_interruptible(&filter->notify_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
knotif = find_notification(filter, addfd.id);
|
||||
if (!knotif) {
|
||||
ret = -ENOENT;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
out:
|
||||
/*
|
||||
* We do not want to allow for FD injection to occur before the
|
||||
* notification has been picked up by a userspace handler, or after
|
||||
* the notification has been replied to.
|
||||
*/
|
||||
if (knotif->state != SECCOMP_NOTIFY_SENT) {
|
||||
ret = -EINPROGRESS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
list_add(&kaddfd.list, &knotif->addfd);
|
||||
complete(&knotif->ready);
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
|
||||
/* Now we wait for it to be processed or be interrupted */
|
||||
ret = wait_for_completion_interruptible(&kaddfd.completion);
|
||||
if (ret == 0) {
|
||||
/*
|
||||
* We had a successful completion. The other side has already
|
||||
* removed us from the addfd queue, and
|
||||
* wait_for_completion_interruptible has a memory barrier upon
|
||||
* success that lets us read this value directly without
|
||||
* locking.
|
||||
*/
|
||||
ret = kaddfd.ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&filter->notify_lock);
|
||||
/*
|
||||
* Even though we were woken up by a signal and not a successful
|
||||
* completion, a completion may have happened in the mean time.
|
||||
*
|
||||
* We need to check again if the addfd request has been handled,
|
||||
* and if not, we will remove it from the queue.
|
||||
*/
|
||||
if (list_empty(&kaddfd.list))
|
||||
ret = kaddfd.ret;
|
||||
else
|
||||
list_del(&kaddfd.list);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
out:
|
||||
fput(kaddfd.file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1181,13 +1402,22 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
|
||||
struct seccomp_filter *filter = file->private_data;
|
||||
void __user *buf = (void __user *)arg;
|
||||
|
||||
/* Fixed-size ioctls */
|
||||
switch (cmd) {
|
||||
case SECCOMP_IOCTL_NOTIF_RECV:
|
||||
return seccomp_notify_recv(filter, buf);
|
||||
case SECCOMP_IOCTL_NOTIF_SEND:
|
||||
return seccomp_notify_send(filter, buf);
|
||||
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
|
||||
case SECCOMP_IOCTL_NOTIF_ID_VALID:
|
||||
return seccomp_notify_id_valid(filter, buf);
|
||||
}
|
||||
|
||||
/* Extensible Argument ioctls */
|
||||
#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
|
||||
switch (EA_IOCTL(cmd)) {
|
||||
case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
|
||||
return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1200,7 +1430,7 @@ static __poll_t seccomp_notify_poll(struct file *file,
|
||||
__poll_t ret = 0;
|
||||
struct seccomp_knotif *cur;
|
||||
|
||||
poll_wait(file, &filter->notif->wqh, poll_tab);
|
||||
poll_wait(file, &filter->wqh, poll_tab);
|
||||
|
||||
if (mutex_lock_interruptible(&filter->notify_lock) < 0)
|
||||
return EPOLLERR;
|
||||
@ -1216,6 +1446,9 @@ static __poll_t seccomp_notify_poll(struct file *file,
|
||||
|
||||
mutex_unlock(&filter->notify_lock);
|
||||
|
||||
if (refcount_read(&filter->users) == 0)
|
||||
ret |= EPOLLHUP;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1244,7 +1477,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
|
||||
sema_init(&filter->notif->request, 0);
|
||||
filter->notif->next_id = get_random_u64();
|
||||
INIT_LIST_HEAD(&filter->notif->notifications);
|
||||
init_waitqueue_head(&filter->notif->wqh);
|
||||
|
||||
ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
|
||||
filter, O_RDWR);
|
||||
@ -1822,7 +2054,7 @@ static int __init seccomp_sysctl_init(void)
|
||||
|
||||
hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
|
||||
if (!hdr)
|
||||
pr_warn("seccomp: sysctl registration failed\n");
|
||||
pr_warn("sysctl registration failed\n");
|
||||
else
|
||||
kmemleak_not_leak(hdr);
|
||||
|
||||
|
55
net/compat.c
55
net/compat.c
@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
|
||||
static int scm_max_fds_compat(struct msghdr *msg)
|
||||
{
|
||||
struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
|
||||
int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
|
||||
int fdnum = scm->fp->count;
|
||||
struct file **fp = scm->fp->fp;
|
||||
int __user *cmfptr;
|
||||
if (msg->msg_controllen <= sizeof(struct compat_cmsghdr))
|
||||
return 0;
|
||||
return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
|
||||
}
|
||||
|
||||
void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
|
||||
{
|
||||
struct compat_cmsghdr __user *cm =
|
||||
(struct compat_cmsghdr __user *)msg->msg_control;
|
||||
unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
|
||||
int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count);
|
||||
int __user *cmsg_data = CMSG_USER_DATA(cm);
|
||||
int err = 0, i;
|
||||
|
||||
if (fdnum < fdmax)
|
||||
fdmax = fdnum;
|
||||
|
||||
for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) {
|
||||
int new_fd;
|
||||
err = security_file_receive(fp[i]);
|
||||
if (err)
|
||||
break;
|
||||
err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags
|
||||
? O_CLOEXEC : 0);
|
||||
for (i = 0; i < fdmax; i++) {
|
||||
err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err < 0)
|
||||
break;
|
||||
new_fd = err;
|
||||
err = put_user(new_fd, cmfptr);
|
||||
if (err) {
|
||||
put_unused_fd(new_fd);
|
||||
break;
|
||||
}
|
||||
/* Bump the usage count and install the file. */
|
||||
fd_install(new_fd, get_file(fp[i]));
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
int cmlen = CMSG_COMPAT_LEN(i * sizeof(int));
|
||||
|
||||
err = put_user(SOL_SOCKET, &cm->cmsg_level);
|
||||
if (!err)
|
||||
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
|
||||
@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
|
||||
err = put_user(cmlen, &cm->cmsg_len);
|
||||
if (!err) {
|
||||
cmlen = CMSG_COMPAT_SPACE(i * sizeof(int));
|
||||
kmsg->msg_control += cmlen;
|
||||
kmsg->msg_controllen -= cmlen;
|
||||
if (msg->msg_controllen < cmlen)
|
||||
cmlen = msg->msg_controllen;
|
||||
msg->msg_control += cmlen;
|
||||
msg->msg_controllen -= cmlen;
|
||||
}
|
||||
}
|
||||
if (i < fdnum)
|
||||
kmsg->msg_flags |= MSG_CTRUNC;
|
||||
|
||||
if (i < scm->fp->count || (scm->fp->count && fdmax <= 0))
|
||||
msg->msg_flags |= MSG_CTRUNC;
|
||||
|
||||
/*
|
||||
* All of the files that fit in the message have had their
|
||||
* usage counts incremented, so we just free the list.
|
||||
* All of the files that fit in the message have had their usage counts
|
||||
* incremented, so we just free the list.
|
||||
*/
|
||||
__scm_destroy(scm);
|
||||
}
|
||||
|
@ -280,36 +280,6 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter
|
||||
}
|
||||
EXPORT_SYMBOL(put_cmsg_scm_timestamping);
|
||||
|
||||
static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags)
|
||||
{
|
||||
struct socket *sock;
|
||||
int new_fd;
|
||||
int error;
|
||||
|
||||
error = security_file_receive(file);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
new_fd = get_unused_fd_flags(o_flags);
|
||||
if (new_fd < 0)
|
||||
return new_fd;
|
||||
|
||||
error = put_user(new_fd, ufd);
|
||||
if (error) {
|
||||
put_unused_fd(new_fd);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Bump the usage count and install the file. */
|
||||
sock = sock_from_file(file, &error);
|
||||
if (sock) {
|
||||
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
|
||||
sock_update_classid(&sock->sk->sk_cgrp_data);
|
||||
}
|
||||
fd_install(new_fd, get_file(file));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int scm_max_fds(struct msghdr *msg)
|
||||
{
|
||||
if (msg->msg_controllen <= sizeof(struct cmsghdr))
|
||||
@ -319,29 +289,29 @@ static int scm_max_fds(struct msghdr *msg)
|
||||
|
||||
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
|
||||
{
|
||||
struct cmsghdr __user *cm
|
||||
= (__force struct cmsghdr __user*)msg->msg_control;
|
||||
int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
|
||||
struct cmsghdr __user *cm =
|
||||
(__force struct cmsghdr __user *)msg->msg_control;
|
||||
unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
|
||||
int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
|
||||
int __user *cmsg_data = CMSG_USER_DATA(cm);
|
||||
int err = 0, i;
|
||||
|
||||
/* no use for FD passing from kernel space callers */
|
||||
if (WARN_ON_ONCE(!msg->msg_control_is_user))
|
||||
return;
|
||||
|
||||
if (msg->msg_flags & MSG_CMSG_COMPAT) {
|
||||
scm_detach_fds_compat(msg, scm);
|
||||
return;
|
||||
}
|
||||
|
||||
/* no use for FD passing from kernel space callers */
|
||||
if (WARN_ON_ONCE(!msg->msg_control_is_user))
|
||||
return;
|
||||
|
||||
for (i = 0; i < fdmax; i++) {
|
||||
err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err)
|
||||
err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
if (i > 0) {
|
||||
int cmlen = CMSG_LEN(i * sizeof(int));
|
||||
|
||||
err = put_user(SOL_SOCKET, &cm->cmsg_level);
|
||||
|
@ -2842,6 +2842,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *
|
||||
}
|
||||
EXPORT_SYMBOL(sock_no_mmap);
|
||||
|
||||
/*
|
||||
* When a file is received (via SCM_RIGHTS, etc), we must bump the
|
||||
* various sock-based usage counts.
|
||||
*/
|
||||
void __receive_sock(struct file *file)
|
||||
{
|
||||
struct socket *sock;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* The resulting value of "error" is ignored here since we only
|
||||
* need to take action when the file is a socket and testing
|
||||
* "sock" for NULL is sufficient.
|
||||
*/
|
||||
sock = sock_from_file(file, &error);
|
||||
if (sock) {
|
||||
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
|
||||
sock_update_classid(&sock->sk->sk_cgrp_data);
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
|
||||
{
|
||||
ssize_t res;
|
||||
|
@ -195,8 +195,9 @@
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* FIXTURE_DATA(datatype name)
|
||||
* FIXTURE_DATA(datatype_name)
|
||||
*
|
||||
* Almost always, you want just FIXTURE() instead (see below).
|
||||
* This call may be used when the type of the fixture data
|
||||
* is needed. In general, this should not be needed unless
|
||||
* the *self* is being passed to a helper directly.
|
||||
@ -211,7 +212,7 @@
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* FIXTURE(datatype name) {
|
||||
* FIXTURE(fixture_name) {
|
||||
* type property1;
|
||||
* ...
|
||||
* };
|
||||
@ -238,7 +239,7 @@
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* FIXTURE_SETUP(fixture name) { implementation }
|
||||
* FIXTURE_SETUP(fixture_name) { implementation }
|
||||
*
|
||||
* Populates the required "setup" function for a fixture. An instance of the
|
||||
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the
|
||||
@ -264,7 +265,7 @@
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* FIXTURE_TEARDOWN(fixture name) { implementation }
|
||||
* FIXTURE_TEARDOWN(fixture_name) { implementation }
|
||||
*
|
||||
* Populates the required "teardown" function for a fixture. An instance of the
|
||||
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the
|
||||
@ -285,7 +286,7 @@
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* FIXTURE_VARIANT(datatype name) {
|
||||
* FIXTURE_VARIANT(fixture_name) {
|
||||
* type property1;
|
||||
* ...
|
||||
* };
|
||||
@ -305,8 +306,8 @@
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* FIXTURE_ADD(datatype name) {
|
||||
* .property1 = val1;
|
||||
* FIXTURE_VARIANT_ADD(fixture_name, variant_name) {
|
||||
* .property1 = val1,
|
||||
* ...
|
||||
* };
|
||||
*
|
||||
|
@ -1,2 +1,3 @@
|
||||
CONFIG_SECCOMP=y
|
||||
CONFIG_SECCOMP_FILTER=y
|
||||
CONFIG_USER_NS=y
|
||||
|
@ -18,9 +18,9 @@
|
||||
|
||||
unsigned long long timing(clockid_t clk_id, unsigned long long samples)
|
||||
{
|
||||
pid_t pid, ret;
|
||||
unsigned long long i;
|
||||
struct timespec start, finish;
|
||||
unsigned long long i;
|
||||
pid_t pid, ret;
|
||||
|
||||
pid = getpid();
|
||||
assert(clock_gettime(clk_id, &start) == 0);
|
||||
@ -31,30 +31,43 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples)
|
||||
assert(clock_gettime(clk_id, &finish) == 0);
|
||||
|
||||
i = finish.tv_sec - start.tv_sec;
|
||||
i *= 1000000000;
|
||||
i *= 1000000000ULL;
|
||||
i += finish.tv_nsec - start.tv_nsec;
|
||||
|
||||
printf("%lu.%09lu - %lu.%09lu = %llu\n",
|
||||
printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
|
||||
finish.tv_sec, finish.tv_nsec,
|
||||
start.tv_sec, start.tv_nsec,
|
||||
i);
|
||||
i, (double)i / 1000000000.0);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
unsigned long long calibrate(void)
|
||||
{
|
||||
unsigned long long i;
|
||||
struct timespec start, finish;
|
||||
unsigned long long i, samples, step = 9973;
|
||||
pid_t pid, ret;
|
||||
int seconds = 15;
|
||||
|
||||
printf("Calibrating reasonable sample size...\n");
|
||||
printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
|
||||
|
||||
for (i = 5; ; i++) {
|
||||
unsigned long long samples = 1 << i;
|
||||
samples = 0;
|
||||
pid = getpid();
|
||||
assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0);
|
||||
do {
|
||||
for (i = 0; i < step; i++) {
|
||||
ret = syscall(__NR_getpid);
|
||||
assert(pid == ret);
|
||||
}
|
||||
assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0);
|
||||
|
||||
/* Find something that takes more than 5 seconds to run. */
|
||||
if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
|
||||
return samples;
|
||||
}
|
||||
samples += step;
|
||||
i = finish.tv_sec - start.tv_sec;
|
||||
i *= 1000000000ULL;
|
||||
i += finish.tv_nsec - start.tv_nsec;
|
||||
} while (i < 1000000000ULL);
|
||||
|
||||
return samples * seconds;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@ -68,32 +81,55 @@ int main(int argc, char *argv[])
|
||||
};
|
||||
long ret;
|
||||
unsigned long long samples;
|
||||
unsigned long long native, filtered;
|
||||
unsigned long long native, filter1, filter2;
|
||||
|
||||
printf("Current BPF sysctl settings:\n");
|
||||
system("sysctl net.core.bpf_jit_enable");
|
||||
system("sysctl net.core.bpf_jit_harden");
|
||||
|
||||
if (argc > 1)
|
||||
samples = strtoull(argv[1], NULL, 0);
|
||||
else
|
||||
samples = calibrate();
|
||||
|
||||
printf("Benchmarking %llu samples...\n", samples);
|
||||
printf("Benchmarking %llu syscalls...\n", samples);
|
||||
|
||||
/* Native call */
|
||||
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
|
||||
printf("getpid native: %llu ns\n", native);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
assert(ret == 0);
|
||||
|
||||
/* One filter */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
|
||||
assert(ret == 0);
|
||||
|
||||
filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
|
||||
printf("getpid RET_ALLOW: %llu ns\n", filtered);
|
||||
filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
|
||||
printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1);
|
||||
|
||||
printf("Estimated seccomp overhead per syscall: %llu ns\n",
|
||||
filtered - native);
|
||||
if (filter1 == native)
|
||||
printf("No overhead measured!? Try running again with more samples.\n");
|
||||
|
||||
if (filtered == native)
|
||||
printf("Trying running again with more samples.\n");
|
||||
/* Two filters */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
|
||||
assert(ret == 0);
|
||||
|
||||
filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
|
||||
printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2);
|
||||
|
||||
/* Calculations */
|
||||
printf("Estimated total seccomp overhead for 1 filter: %llu ns\n",
|
||||
filter1 - native);
|
||||
|
||||
printf("Estimated total seccomp overhead for 2 filters: %llu ns\n",
|
||||
filter2 - native);
|
||||
|
||||
printf("Estimated seccomp per-filter overhead: %llu ns\n",
|
||||
filter2 - filter1);
|
||||
|
||||
printf("Estimated seccomp entry overhead: %llu ns\n",
|
||||
filter1 - native - (filter2 - filter1));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -45,12 +45,19 @@
|
||||
#include <sys/socket.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/kcmp.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <poll.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
|
||||
/* Attempt to de-conflict with the selftests tree. */
|
||||
#ifndef SKIP
|
||||
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifndef PR_SET_PTRACER
|
||||
# define PR_SET_PTRACER 0x59616d61
|
||||
@ -167,7 +174,9 @@ struct seccomp_metadata {
|
||||
|
||||
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
|
||||
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
|
||||
#endif
|
||||
|
||||
#ifndef SECCOMP_RET_USER_NOTIF
|
||||
#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
|
||||
|
||||
#define SECCOMP_IOC_MAGIC '!'
|
||||
@ -180,7 +189,7 @@ struct seccomp_metadata {
|
||||
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
|
||||
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
|
||||
struct seccomp_notif_resp)
|
||||
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
|
||||
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
|
||||
|
||||
struct seccomp_notif {
|
||||
__u64 id;
|
||||
@ -203,6 +212,39 @@ struct seccomp_notif_sizes {
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef SECCOMP_IOCTL_NOTIF_ADDFD
|
||||
/* On success, the return value is the remote process's added fd number */
|
||||
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
|
||||
struct seccomp_notif_addfd)
|
||||
|
||||
/* valid flags for seccomp_notif_addfd */
|
||||
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
|
||||
|
||||
struct seccomp_notif_addfd {
|
||||
__u64 id;
|
||||
__u32 flags;
|
||||
__u32 srcfd;
|
||||
__u32 newfd;
|
||||
__u32 newfd_flags;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct seccomp_notif_addfd_small {
|
||||
__u64 id;
|
||||
char weird[4];
|
||||
};
|
||||
#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
|
||||
SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
|
||||
|
||||
struct seccomp_notif_addfd_big {
|
||||
union {
|
||||
struct seccomp_notif_addfd addfd;
|
||||
char buf[sizeof(struct seccomp_notif_addfd) + 8];
|
||||
};
|
||||
};
|
||||
#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
|
||||
SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
|
||||
|
||||
#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
|
||||
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
|
||||
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
|
||||
@ -236,6 +278,40 @@ int seccomp(unsigned int op, unsigned int flags, void *args)
|
||||
#define SIBLING_EXIT_FAILURE 0xbadface
|
||||
#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
|
||||
|
||||
static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
|
||||
{
|
||||
#ifdef __NR_kcmp
|
||||
errno = 0;
|
||||
return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
|
||||
#else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Have TH_LOG report actual location filecmp() is used. */
|
||||
#define filecmp(pid1, pid2, fd1, fd2) ({ \
|
||||
int _ret; \
|
||||
\
|
||||
_ret = __filecmp(pid1, pid2, fd1, fd2); \
|
||||
if (_ret != 0) { \
|
||||
if (_ret < 0 && errno == ENOSYS) { \
|
||||
TH_LOG("kcmp() syscall missing (test is less accurate)");\
|
||||
_ret = 0; \
|
||||
} \
|
||||
} \
|
||||
_ret; })
|
||||
|
||||
TEST(kcmp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __filecmp(getpid(), getpid(), 1, 1);
|
||||
EXPECT_EQ(ret, 0);
|
||||
if (ret != 0 && errno == ENOSYS)
|
||||
SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
|
||||
}
|
||||
|
||||
TEST(mode_strict_support)
|
||||
{
|
||||
long ret;
|
||||
@ -1470,6 +1546,7 @@ pid_t setup_trace_fixture(struct __test_metadata *_metadata,
|
||||
|
||||
return tracer_pid;
|
||||
}
|
||||
|
||||
void teardown_trace_fixture(struct __test_metadata *_metadata,
|
||||
pid_t tracer)
|
||||
{
|
||||
@ -1750,7 +1827,7 @@ void change_syscall(struct __test_metadata *_metadata,
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
|
||||
void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
|
||||
int status, void *args)
|
||||
{
|
||||
int ret;
|
||||
@ -1827,6 +1904,24 @@ FIXTURE(TRACE_syscall) {
|
||||
pid_t tracer, mytid, mypid, parent;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT(TRACE_syscall) {
|
||||
/*
|
||||
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
|
||||
* SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
|
||||
* This indicates if we should use SECCOMP_RET_TRACE (false), or
|
||||
* ptrace (true).
|
||||
*/
|
||||
bool use_ptrace;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
|
||||
.use_ptrace = true,
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
|
||||
.use_ptrace = false,
|
||||
};
|
||||
|
||||
FIXTURE_SETUP(TRACE_syscall)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
@ -1842,12 +1937,11 @@ FIXTURE_SETUP(TRACE_syscall)
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
|
||||
memset(&self->prog, 0, sizeof(self->prog));
|
||||
self->prog.filter = malloc(sizeof(filter));
|
||||
ASSERT_NE(NULL, self->prog.filter);
|
||||
memcpy(self->prog.filter, filter, sizeof(filter));
|
||||
self->prog.len = (unsigned short)ARRAY_SIZE(filter);
|
||||
struct sock_fprog prog = {
|
||||
.len = (unsigned short)ARRAY_SIZE(filter),
|
||||
.filter = filter,
|
||||
};
|
||||
long ret;
|
||||
|
||||
/* Prepare some testable syscall results. */
|
||||
self->mytid = syscall(__NR_gettid);
|
||||
@ -1865,60 +1959,48 @@ FIXTURE_SETUP(TRACE_syscall)
|
||||
ASSERT_NE(self->parent, self->mypid);
|
||||
|
||||
/* Launch tracer. */
|
||||
self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
|
||||
false);
|
||||
self->tracer = setup_trace_fixture(_metadata,
|
||||
variant->use_ptrace ? tracer_ptrace
|
||||
: tracer_seccomp,
|
||||
NULL, variant->use_ptrace);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
if (variant->use_ptrace)
|
||||
return;
|
||||
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(TRACE_syscall)
|
||||
{
|
||||
teardown_trace_fixture(_metadata, self->tracer);
|
||||
if (self->prog.filter)
|
||||
free(self->prog.filter);
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, ptrace_syscall_redirected)
|
||||
TEST(negative_ENOSYS)
|
||||
{
|
||||
/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
|
||||
teardown_trace_fixture(_metadata, self->tracer);
|
||||
self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
|
||||
true);
|
||||
|
||||
/* Tracer will redirect getpid to getppid. */
|
||||
EXPECT_NE(self->mypid, syscall(__NR_getpid));
|
||||
/*
|
||||
* There should be no difference between an "internal" skip
|
||||
* and userspace asking for syscall "-1".
|
||||
*/
|
||||
errno = 0;
|
||||
EXPECT_EQ(-1, syscall(-1));
|
||||
EXPECT_EQ(errno, ENOSYS);
|
||||
/* And no difference for "still not valid but not -1". */
|
||||
errno = 0;
|
||||
EXPECT_EQ(-1, syscall(-101));
|
||||
EXPECT_EQ(errno, ENOSYS);
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, ptrace_syscall_errno)
|
||||
TEST_F(TRACE_syscall, negative_ENOSYS)
|
||||
{
|
||||
/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
|
||||
teardown_trace_fixture(_metadata, self->tracer);
|
||||
self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
|
||||
true);
|
||||
|
||||
/* Tracer should skip the open syscall, resulting in ESRCH. */
|
||||
EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, ptrace_syscall_faked)
|
||||
{
|
||||
/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
|
||||
teardown_trace_fixture(_metadata, self->tracer);
|
||||
self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
|
||||
true);
|
||||
|
||||
/* Tracer should skip the gettid syscall, resulting fake pid. */
|
||||
EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
|
||||
negative_ENOSYS(_metadata);
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, syscall_allowed)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* getppid works as expected (no changes). */
|
||||
EXPECT_EQ(self->parent, syscall(__NR_getppid));
|
||||
EXPECT_NE(self->mypid, syscall(__NR_getppid));
|
||||
@ -1926,14 +2008,6 @@ TEST_F(TRACE_syscall, syscall_allowed)
|
||||
|
||||
TEST_F(TRACE_syscall, syscall_redirected)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* getpid has been redirected to getppid as expected. */
|
||||
EXPECT_EQ(self->parent, syscall(__NR_getpid));
|
||||
EXPECT_NE(self->mypid, syscall(__NR_getpid));
|
||||
@ -1941,33 +2015,17 @@ TEST_F(TRACE_syscall, syscall_redirected)
|
||||
|
||||
TEST_F(TRACE_syscall, syscall_errno)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* openat has been skipped and an errno return. */
|
||||
/* Tracer should skip the open syscall, resulting in ESRCH. */
|
||||
EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, syscall_faked)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* gettid has been skipped and an altered return value stored. */
|
||||
/* Tracer skips the gettid syscall and store altered return value. */
|
||||
EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, skip_after_RET_TRACE)
|
||||
TEST_F(TRACE_syscall, skip_after)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
@ -1982,14 +2040,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
|
||||
};
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Install fixture filter. */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Install "errno on getppid" filter. */
|
||||
/* Install additional "errno on getppid" filter. */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
@ -1999,7 +2050,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
|
||||
EXPECT_EQ(EPERM, errno);
|
||||
}
|
||||
|
||||
TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
|
||||
TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
@ -2014,77 +2065,7 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
|
||||
};
|
||||
long ret;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Install fixture filter. */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Install "death on getppid" filter. */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Tracer will redirect getpid to getppid, and we should die. */
|
||||
EXPECT_NE(self->mypid, syscall(__NR_getpid));
|
||||
}
|
||||
|
||||
TEST_F(TRACE_syscall, skip_after_ptrace)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
struct sock_fprog prog = {
|
||||
.len = (unsigned short)ARRAY_SIZE(filter),
|
||||
.filter = filter,
|
||||
};
|
||||
long ret;
|
||||
|
||||
/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
|
||||
teardown_trace_fixture(_metadata, self->tracer);
|
||||
self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
|
||||
true);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Install "errno on getppid" filter. */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Tracer will redirect getpid to getppid, and we should see EPERM. */
|
||||
EXPECT_EQ(-1, syscall(__NR_getpid));
|
||||
EXPECT_EQ(EPERM, errno);
|
||||
}
|
||||
|
||||
TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
struct sock_fprog prog = {
|
||||
.len = (unsigned short)ARRAY_SIZE(filter),
|
||||
.filter = filter,
|
||||
};
|
||||
long ret;
|
||||
|
||||
/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
|
||||
teardown_trace_fixture(_metadata, self->tracer);
|
||||
self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
|
||||
true);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
/* Install "death on getppid" filter. */
|
||||
/* Install additional "death on getppid" filter. */
|
||||
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
@ -3069,7 +3050,7 @@ TEST(get_metadata)
|
||||
|
||||
/* Only real root can get metadata. */
|
||||
if (geteuid()) {
|
||||
XFAIL(return, "get_metadata requires real root");
|
||||
SKIP(return, "get_metadata requires real root");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3112,7 +3093,7 @@ TEST(get_metadata)
|
||||
ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
|
||||
EXPECT_EQ(sizeof(md), ret) {
|
||||
if (errno == EINVAL)
|
||||
XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
|
||||
SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
|
||||
}
|
||||
|
||||
EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
|
||||
@ -3128,7 +3109,7 @@ skip:
|
||||
ASSERT_EQ(0, kill(pid, SIGKILL));
|
||||
}
|
||||
|
||||
static int user_trap_syscall(int nr, unsigned int flags)
|
||||
static int user_notif_syscall(int nr, unsigned int flags)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
|
||||
@ -3174,7 +3155,7 @@ TEST(user_notification_basic)
|
||||
|
||||
/* Check that we get -ENOSYS with no listener attached */
|
||||
if (pid == 0) {
|
||||
if (user_trap_syscall(__NR_getppid, 0) < 0)
|
||||
if (user_notif_syscall(__NR_getppid, 0) < 0)
|
||||
exit(1);
|
||||
ret = syscall(__NR_getppid);
|
||||
exit(ret >= 0 || errno != ENOSYS);
|
||||
@ -3191,13 +3172,13 @@ TEST(user_notification_basic)
|
||||
EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
|
||||
|
||||
/* Check that the basic notification machinery works */
|
||||
listener = user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
/* Installing a second listener in the chain should EBUSY */
|
||||
EXPECT_EQ(user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER),
|
||||
EXPECT_EQ(user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER),
|
||||
-1);
|
||||
EXPECT_EQ(errno, EBUSY);
|
||||
|
||||
@ -3258,15 +3239,20 @@ TEST(user_notification_with_tsync)
|
||||
int ret;
|
||||
unsigned int flags;
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
/* these were exclusive */
|
||||
flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
|
||||
SECCOMP_FILTER_FLAG_TSYNC;
|
||||
ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
|
||||
ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
|
||||
ASSERT_EQ(EINVAL, errno);
|
||||
|
||||
/* but now they're not */
|
||||
flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
|
||||
ret = user_trap_syscall(__NR_getppid, flags);
|
||||
ret = user_notif_syscall(__NR_getppid, flags);
|
||||
close(ret);
|
||||
ASSERT_LE(0, ret);
|
||||
}
|
||||
@ -3284,8 +3270,8 @@ TEST(user_notification_kill_in_middle)
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
listener = user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
/*
|
||||
@ -3338,8 +3324,8 @@ TEST(user_notification_signal)
|
||||
|
||||
ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
|
||||
|
||||
listener = user_trap_syscall(__NR_gettid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_gettid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
@ -3408,8 +3394,8 @@ TEST(user_notification_closed_listener)
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
listener = user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
/*
|
||||
@ -3440,10 +3426,13 @@ TEST(user_notification_child_pid_ns)
|
||||
struct seccomp_notif req = {};
|
||||
struct seccomp_notif_resp resp = {};
|
||||
|
||||
ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
|
||||
ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
|
||||
if (errno == EINVAL)
|
||||
SKIP(return, "kernel missing CLONE_NEWUSER support");
|
||||
};
|
||||
|
||||
listener = user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
@ -3482,8 +3471,8 @@ TEST(user_notification_sibling_pid_ns)
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
listener = user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
@ -3505,7 +3494,10 @@ TEST(user_notification_sibling_pid_ns)
|
||||
}
|
||||
|
||||
/* Create the sibling ns, and sibling in it. */
|
||||
ASSERT_EQ(unshare(CLONE_NEWPID), 0);
|
||||
ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
|
||||
if (errno == EPERM)
|
||||
SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
|
||||
}
|
||||
ASSERT_EQ(errno, 0);
|
||||
|
||||
pid2 = fork();
|
||||
@ -3547,8 +3539,8 @@ TEST(user_notification_fault_recv)
|
||||
|
||||
ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
|
||||
|
||||
listener = user_trap_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
@ -3585,16 +3577,6 @@ TEST(seccomp_get_notif_sizes)
|
||||
EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
|
||||
}
|
||||
|
||||
static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
|
||||
{
|
||||
#ifdef __NR_kcmp
|
||||
return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
|
||||
#else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(user_notification_continue)
|
||||
{
|
||||
pid_t pid;
|
||||
@ -3609,7 +3591,7 @@ TEST(user_notification_continue)
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
@ -3619,20 +3601,14 @@ TEST(user_notification_continue)
|
||||
int dup_fd, pipe_fds[2];
|
||||
pid_t self;
|
||||
|
||||
ret = pipe(pipe_fds);
|
||||
if (ret < 0)
|
||||
exit(1);
|
||||
ASSERT_GE(pipe(pipe_fds), 0);
|
||||
|
||||
dup_fd = dup(pipe_fds[0]);
|
||||
if (dup_fd < 0)
|
||||
exit(1);
|
||||
ASSERT_GE(dup_fd, 0);
|
||||
EXPECT_NE(pipe_fds[0], dup_fd);
|
||||
|
||||
self = getpid();
|
||||
|
||||
ret = filecmp(self, self, pipe_fds[0], dup_fd);
|
||||
if (ret)
|
||||
exit(2);
|
||||
|
||||
ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@ -3673,7 +3649,7 @@ TEST(user_notification_continue)
|
||||
resp.val = 0;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
|
||||
if (errno == EINVAL)
|
||||
XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
|
||||
SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
|
||||
}
|
||||
|
||||
skip:
|
||||
@ -3681,15 +3657,342 @@ skip:
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status)) {
|
||||
if (WEXITSTATUS(status) == 2) {
|
||||
XFAIL(return, "Kernel does not support kcmp() syscall");
|
||||
SKIP(return, "Kernel does not support kcmp() syscall");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(user_notification_filter_empty)
|
||||
{
|
||||
pid_t pid;
|
||||
long ret;
|
||||
int status;
|
||||
struct pollfd pollfd;
|
||||
struct clone_args args = {
|
||||
.flags = CLONE_FILES,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
int listener;
|
||||
|
||||
listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
if (listener < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (dup2(listener, 200) != 200)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
close(listener);
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
EXPECT_EQ(waitpid(pid, &status, 0), pid);
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
|
||||
/*
|
||||
* The seccomp filter has become unused so we should be notified once
|
||||
* the kernel gets around to cleaning up task struct.
|
||||
*/
|
||||
pollfd.fd = 200;
|
||||
pollfd.events = POLLHUP;
|
||||
|
||||
EXPECT_GT(poll(&pollfd, 1, 2000), 0);
|
||||
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
|
||||
}
|
||||
|
||||
static void *do_thread(void *data)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TEST(user_notification_filter_empty_threaded)
|
||||
{
|
||||
pid_t pid;
|
||||
long ret;
|
||||
int status;
|
||||
struct pollfd pollfd;
|
||||
struct clone_args args = {
|
||||
.flags = CLONE_FILES,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
pid_t pid1, pid2;
|
||||
int listener, status;
|
||||
pthread_t thread;
|
||||
|
||||
listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
if (listener < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (dup2(listener, 200) != 200)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
close(listener);
|
||||
|
||||
pid1 = fork();
|
||||
if (pid1 < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (pid1 == 0)
|
||||
_exit(EXIT_SUCCESS);
|
||||
|
||||
pid2 = fork();
|
||||
if (pid2 < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (pid2 == 0)
|
||||
_exit(EXIT_SUCCESS);
|
||||
|
||||
if (pthread_create(&thread, NULL, do_thread, NULL) ||
|
||||
pthread_join(thread, NULL))
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (pthread_create(&thread, NULL, do_thread, NULL) ||
|
||||
pthread_join(thread, NULL))
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
|
||||
WEXITSTATUS(status))
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
|
||||
WEXITSTATUS(status))
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
EXPECT_EQ(waitpid(pid, &status, 0), pid);
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
|
||||
/*
|
||||
* The seccomp filter has become unused so we should be notified once
|
||||
* the kernel gets around to cleaning up task struct.
|
||||
*/
|
||||
pollfd.fd = 200;
|
||||
pollfd.events = POLLHUP;
|
||||
|
||||
EXPECT_GT(poll(&pollfd, 1, 2000), 0);
|
||||
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
|
||||
}
|
||||
|
||||
TEST(user_notification_addfd)
|
||||
{
|
||||
pid_t pid;
|
||||
long ret;
|
||||
int status, listener, memfd, fd;
|
||||
struct seccomp_notif_addfd addfd = {};
|
||||
struct seccomp_notif_addfd_small small = {};
|
||||
struct seccomp_notif_addfd_big big = {};
|
||||
struct seccomp_notif req = {};
|
||||
struct seccomp_notif_resp resp = {};
|
||||
/* 100 ms */
|
||||
struct timespec delay = { .tv_nsec = 100000000 };
|
||||
|
||||
memfd = memfd_create("test", 0);
|
||||
ASSERT_GE(memfd, 0);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
/* Check that the basic notification machinery works */
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
|
||||
exit(1);
|
||||
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
|
||||
}
|
||||
|
||||
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
|
||||
|
||||
addfd.srcfd = memfd;
|
||||
addfd.newfd = 0;
|
||||
addfd.id = req.id;
|
||||
addfd.flags = 0x0;
|
||||
|
||||
/* Verify bad newfd_flags cannot be set */
|
||||
addfd.newfd_flags = ~O_CLOEXEC;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
|
||||
EXPECT_EQ(errno, EINVAL);
|
||||
addfd.newfd_flags = O_CLOEXEC;
|
||||
|
||||
/* Verify bad flags cannot be set */
|
||||
addfd.flags = 0xff;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
|
||||
EXPECT_EQ(errno, EINVAL);
|
||||
addfd.flags = 0;
|
||||
|
||||
/* Verify that remote_fd cannot be set without setting flags */
|
||||
addfd.newfd = 1;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
|
||||
EXPECT_EQ(errno, EINVAL);
|
||||
addfd.newfd = 0;
|
||||
|
||||
/* Verify small size cannot be set */
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
|
||||
EXPECT_EQ(errno, EINVAL);
|
||||
|
||||
/* Verify we can't send bits filled in unknown buffer area */
|
||||
memset(&big, 0xAA, sizeof(big));
|
||||
big.addfd = addfd;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
|
||||
EXPECT_EQ(errno, E2BIG);
|
||||
|
||||
|
||||
/* Verify we can set an arbitrary remote fd */
|
||||
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
|
||||
/*
|
||||
* The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd),
|
||||
* 4(listener), so the newly allocated fd should be 5.
|
||||
*/
|
||||
EXPECT_EQ(fd, 5);
|
||||
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
|
||||
|
||||
/* Verify we can set an arbitrary remote fd with large size */
|
||||
memset(&big, 0x0, sizeof(big));
|
||||
big.addfd = addfd;
|
||||
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
|
||||
EXPECT_EQ(fd, 6);
|
||||
|
||||
/* Verify we can set a specific remote fd */
|
||||
addfd.newfd = 42;
|
||||
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
|
||||
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
|
||||
EXPECT_EQ(fd, 42);
|
||||
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
|
||||
|
||||
/* Resume syscall */
|
||||
resp.id = req.id;
|
||||
resp.error = 0;
|
||||
resp.val = USER_NOTIF_MAGIC;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
|
||||
|
||||
/*
|
||||
* This sets the ID of the ADD FD to the last request plus 1. The
|
||||
* notification ID increments 1 per notification.
|
||||
*/
|
||||
addfd.id = req.id + 1;
|
||||
|
||||
/* This spins until the underlying notification is generated */
|
||||
while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
|
||||
errno != -EINPROGRESS)
|
||||
nanosleep(&delay, NULL);
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
|
||||
ASSERT_EQ(addfd.id, req.id);
|
||||
|
||||
resp.id = req.id;
|
||||
resp.error = 0;
|
||||
resp.val = USER_NOTIF_MAGIC;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
|
||||
|
||||
/* Wait for child to finish. */
|
||||
EXPECT_EQ(waitpid(pid, &status, 0), pid);
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
|
||||
close(memfd);
|
||||
}
|
||||
|
||||
TEST(user_notification_addfd_rlimit)
|
||||
{
|
||||
pid_t pid;
|
||||
long ret;
|
||||
int status, listener, memfd;
|
||||
struct seccomp_notif_addfd addfd = {};
|
||||
struct seccomp_notif req = {};
|
||||
struct seccomp_notif_resp resp = {};
|
||||
const struct rlimit lim = {
|
||||
.rlim_cur = 0,
|
||||
.rlim_max = 0,
|
||||
};
|
||||
|
||||
memfd = memfd_create("test", 0);
|
||||
ASSERT_GE(memfd, 0);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
|
||||
}
|
||||
|
||||
/* Check that the basic notification machinery works */
|
||||
listener = user_notif_syscall(__NR_getppid,
|
||||
SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
ASSERT_GE(listener, 0);
|
||||
|
||||
pid = fork();
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0)
|
||||
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
|
||||
|
||||
|
||||
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
|
||||
|
||||
ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
|
||||
|
||||
addfd.srcfd = memfd;
|
||||
addfd.newfd_flags = O_CLOEXEC;
|
||||
addfd.newfd = 0;
|
||||
addfd.id = req.id;
|
||||
addfd.flags = 0;
|
||||
|
||||
/* Should probably spot check /proc/sys/fs/file-nr */
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
|
||||
EXPECT_EQ(errno, EMFILE);
|
||||
|
||||
addfd.newfd = 100;
|
||||
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
|
||||
EXPECT_EQ(errno, EBADF);
|
||||
|
||||
resp.id = req.id;
|
||||
resp.error = 0;
|
||||
resp.val = USER_NOTIF_MAGIC;
|
||||
|
||||
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
|
||||
|
||||
/* Wait for child to finish. */
|
||||
EXPECT_EQ(waitpid(pid, &status, 0), pid);
|
||||
EXPECT_EQ(true, WIFEXITED(status));
|
||||
EXPECT_EQ(0, WEXITSTATUS(status));
|
||||
|
||||
close(memfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO:
|
||||
* - add microbenchmarks
|
||||
* - expand NNP testing
|
||||
* - better arch-specific TRACE and TRAP handlers.
|
||||
* - endianness checking when appropriate
|
||||
@ -3697,7 +4000,6 @@ skip:
|
||||
* - arch value testing (x86 modes especially)
|
||||
* - verify that FILTER_FLAG_LOG filters generate log messages
|
||||
* - verify that RET_LOG generates log messages
|
||||
* - ...
|
||||
*/
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
1
tools/testing/selftests/seccomp/settings
Normal file
1
tools/testing/selftests/seccomp/settings
Normal file
@ -0,0 +1 @@
|
||||
timeout=90
|
Loading…
Reference in New Issue
Block a user