From ecaaa55c9fa5e8058445a8b891070b12208cdb6d Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Wed, 24 Jan 2024 14:13:55 +0000 Subject: [PATCH 1/5] selftests/seccomp: Handle EINVAL on unshare(CLONE_NEWPID) unshare(CLONE_NEWPID) can return EINVAL if the kernel does not have the CONFIG_PID_NS option enabled. Add a check on these calls to skip the test if we receive EINVAL. Signed-off-by: Terry Tritton Link: https://lore.kernel.org/r/20240124141357.1243457-2-terry.tritton@linaro.org Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 38f651469968..5e705674b706 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3709,7 +3709,12 @@ TEST(user_notification_sibling_pid_ns) ASSERT_GE(pid, 0); if (pid == 0) { - ASSERT_EQ(unshare(CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWPID), 0) { + if (errno == EPERM) + SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + else if (errno == EINVAL) + SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); + } pid2 = fork(); ASSERT_GE(pid2, 0); @@ -3727,6 +3732,8 @@ TEST(user_notification_sibling_pid_ns) ASSERT_EQ(unshare(CLONE_NEWPID), 0) { if (errno == EPERM) SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + else if (errno == EINVAL) + SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); } ASSERT_EQ(errno, 0); From 471dbc547612adeaa769e48498ef591c6c95a57a Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Wed, 24 Jan 2024 14:13:56 +0000 Subject: [PATCH 2/5] selftests/seccomp: Change the syscall used in KILL_THREAD test The Bionic version of pthread_create used on Android calls the prctl function to give the stack and thread local storage a useful name. This will cause the KILL_THREAD test to fail as it will kill the thread as soon as it is created. change the test to use getpid instead of prctl. Signed-off-by: Terry Tritton Link: https://lore.kernel.org/r/20240124141357.1243457-3-terry.tritton@linaro.org Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 5e705674b706..da11b95b8872 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -784,7 +784,7 @@ void *kill_thread(void *data) bool die = (bool)data; if (die) { - prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + syscall(__NR_getpid); return (void *)SIBLING_EXIT_FAILURE; } @@ -803,11 +803,11 @@ void kill_thread_or_group(struct __test_metadata *_metadata, { pthread_t thread; void *status; - /* Kill only when calling __NR_prctl. */ + /* Kill only when calling __NR_getpid. */ struct sock_filter filter_thread[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -819,7 +819,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata, struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; From 8e3c9f9f3a0742cd12b682a1766674253b33fcf0 Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Wed, 24 Jan 2024 14:13:57 +0000 Subject: [PATCH 3/5] selftests/seccomp: user_notification_addfd check nextfd is available Currently the user_notification_addfd test checks what the next expected file descriptor will be by incrementing a variable nextfd. This does not account for file descriptors that may already be open before the test is started and will cause the test to fail if any exist. Replace nextfd++ with a function get_next_fd which will check and return the next available file descriptor. Signed-off-by: Terry Tritton Link: https://lore.kernel.org/r/20240124141357.1243457-4-terry.tritton@linaro.org Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index da11b95b8872..cacf6507f690 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -4044,6 +4044,16 @@ TEST(user_notification_filter_empty_threaded) EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); } + +int get_next_fd(int prev_fd) +{ + for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { + if (fcntl(i, F_GETFD) == -1) + return i; + } + _exit(EXIT_FAILURE); +} + TEST(user_notification_addfd) { pid_t pid; @@ -4060,7 +4070,7 @@ TEST(user_notification_addfd) /* There may be arbitrary already-open fds at test start. */ memfd = memfd_create("test", 0); ASSERT_GE(memfd, 0); - nextfd = memfd + 1; + nextfd = get_next_fd(memfd); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret) { @@ -4071,7 +4081,8 @@ TEST(user_notification_addfd) /* Check that the basic notification machinery works */ listener = user_notif_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - ASSERT_EQ(listener, nextfd++); + ASSERT_EQ(listener, nextfd); + nextfd = get_next_fd(nextfd); pid = fork(); ASSERT_GE(pid, 0); @@ -4126,14 +4137,16 @@ TEST(user_notification_addfd) /* Verify we can set an arbitrary remote fd */ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); - EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(fd, nextfd); + nextfd = get_next_fd(nextfd); EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* Verify we can set an arbitrary remote fd with large size */ memset(&big, 0x0, sizeof(big)); big.addfd = addfd; fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); - EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(fd, nextfd); + nextfd = get_next_fd(nextfd); /* Verify we can set a specific remote fd */ addfd.newfd = 42; @@ -4171,7 +4184,8 @@ TEST(user_notification_addfd) * Child has earlier "low" fds and now 42, so we expect the next * lowest available fd to be assigned here. */ - EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(fd, nextfd); + nextfd = get_next_fd(nextfd); ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* From 55e68669b131401074513f903c097dae06ec6db1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2024 01:53:19 -0800 Subject: [PATCH 4/5] selftests/seccomp: Pin benchmark to single CPU The seccomp benchmark test (for validating the benefit of bitmaps) can be sensitive to scheduling speed, so pin the process to a single CPU, which appears to significantly improve reliability, and loosen the "close enough" checking to allow up to 10% variance instead of 1%. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202402061002.3a8722fd-oliver.sang@intel.com Cc: Andy Lutomirski Cc: Will Drewry Reviewed-by: Mark Brown Signed-off-by: Kees Cook --- .../selftests/seccomp/seccomp_benchmark.c | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5b5c9d558dee..9d7aa5a730e0 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -4,7 +4,9 @@ */ #define _GNU_SOURCE #include +#include #include +#include #include #include #include @@ -76,8 +78,12 @@ unsigned long long calibrate(void) bool approx(int i_one, int i_two) { - double one = i_one, one_bump = one * 0.01; - double two = i_two, two_bump = two * 0.01; + /* + * This continues to be a noisy test. Instead of a 1% comparison + * go with 10%. + */ + double one = i_one, one_bump = one * 0.1; + double two = i_two, two_bump = two * 0.1; one_bump = one + MAX(one_bump, 2.0); two_bump = two + MAX(two_bump, 2.0); @@ -119,6 +125,32 @@ long compare(const char *name_one, const char *name_eval, const char *name_two, return good ? 0 : 1; } +/* Pin to a single CPU so the benchmark won't bounce around the system. */ +void affinity(void) +{ + long cpu; + ulong ncores = sysconf(_SC_NPROCESSORS_CONF); + cpu_set_t *setp = CPU_ALLOC(ncores); + ulong setsz = CPU_ALLOC_SIZE(ncores); + + /* + * Totally unscientific way to avoid CPUs that might be busier: + * choose the highest CPU instead of the lowest. + */ + for (cpu = ncores - 1; cpu >= 0; cpu--) { + CPU_ZERO_S(setsz, setp); + CPU_SET_S(cpu, setsz, setp); + if (sched_setaffinity(getpid(), setsz, setp) == -1) + continue; + printf("Pinned to CPU %lu of %lu\n", cpu + 1, ncores); + goto out; + } + fprintf(stderr, "Could not set CPU affinity -- calibration may not work well"); + +out: + CPU_FREE(setp); +} + int main(int argc, char *argv[]) { struct sock_filter bitmap_filter[] = { @@ -153,6 +185,8 @@ int main(int argc, char *argv[]) system("grep -H . /proc/sys/net/core/bpf_jit_enable"); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); + affinity(); + if (argc > 1) samples = strtoull(argv[1], NULL, 0); else From 56af94aace8a0489fb1a32fd6f1cf0c548fe3911 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:17:31 +0100 Subject: [PATCH 5/5] samples: user-trap: fix strict-aliasing warning I started getting warnings for this one file, though I can't see what changed since it was originally introduced in commit fec7b6690541 ("samples: add an example of seccomp user trap"). samples/seccomp/user-trap.c: In function 'send_fd': samples/seccomp/user-trap.c:50:11: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] 50 | *((int *)CMSG_DATA(cmsg)) = fd; | ~^~~~~~~~~~~~~~~~~~~~~~~ samples/seccomp/user-trap.c: In function 'recv_fd': samples/seccomp/user-trap.c:83:18: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] 83 | return *((int *)CMSG_DATA(cmsg)); | ~^~~~~~~~~~~~~~~~~~~~~~~ Using a temporary pointer variable avoids the warning. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240212111737.917428-1-arnd@kernel.org Acked-by: Tycho Andersen Signed-off-by: Kees Cook --- samples/seccomp/user-trap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c index 20291ec6489f..a23fec357b5d 100644 --- a/samples/seccomp/user-trap.c +++ b/samples/seccomp/user-trap.c @@ -33,6 +33,7 @@ static int send_fd(int sock, int fd) { struct msghdr msg = {}; struct cmsghdr *cmsg; + int *fd_ptr; char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c'; struct iovec io = { .iov_base = &c, @@ -47,7 +48,8 @@ static int send_fd(int sock, int fd) cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - *((int *)CMSG_DATA(cmsg)) = fd; + fd_ptr = (int *)CMSG_DATA(cmsg); + *fd_ptr = fd; msg.msg_controllen = cmsg->cmsg_len; if (sendmsg(sock, &msg, 0) < 0) { @@ -62,6 +64,7 @@ static int recv_fd(int sock) { struct msghdr msg = {}; struct cmsghdr *cmsg; + int *fd_ptr; char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c'; struct iovec io = { .iov_base = &c, @@ -79,8 +82,9 @@ static int recv_fd(int sock) } cmsg = CMSG_FIRSTHDR(&msg); + fd_ptr = (int *)CMSG_DATA(cmsg); - return *((int *)CMSG_DATA(cmsg)); + return *fd_ptr; } static int user_trap_syscall(int nr, unsigned int flags)