mirror of
https://github.com/torvalds/linux.git
synced 2024-12-11 05:33:09 +00:00
7a2fb5619c
This is a bug found when implementing pretty-printing for the landlock_add_rule system call, I decided to send this patch separately because this is a serious bug that should be fixed fast. I wrote a test program to do landlock_add_rule syscall in a loop, yet perf trace -e landlock_add_rule freezes, giving no output. This bug is introduced by the false understanding of the variable "key" below: ``` for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { struct syscall *sc = trace__syscall_info(trace, NULL, key); ... } ``` The code above seems right at the beginning, but when looking at syscalltbl.c, I found these lines: ``` for (i = 0; i <= syscalltbl_native_max_id; ++i) if (syscalltbl_native[i]) ++nr_entries; entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries); ... for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) { if (syscalltbl_native[i]) { entries[j].name = syscalltbl_native[i]; entries[j].id = i; ++j; } } ``` meaning the key is merely an index to traverse the syscall table, instead of the actual syscall id for this particular syscall. So if one uses key to do trace__syscall_info(trace, NULL, key), because key only goes up to trace->sctbl->syscalls.nr_entries, for example, on my X86_64 machine, this number is 373, it will end up neglecting all the rest of the syscall, in my case, everything after `rseq`, because the traversal will stop at 373, and `rseq` is the last syscall whose id is lower than 373 in tools/perf/arch/x86/include/generated/asm/syscalls_64.c: ``` ... [334] = "rseq", [424] = "pidfd_send_signal", ... ``` The reason why the key is scrambled but perf trace works well is that key is used in trace__syscall_info(trace, NULL, key) to do trace->syscalls.table[id], this makes sure that the struct syscall returned actually has an id the same value as key, making the later bpf_prog matching all correct. After fixing this bug, I can do perf trace on 38 more syscalls, and because more syscalls are visible, we get 8 more syscalls that can be augmented. before: perf $ perf trace -vv --max-events=1 |& grep Reusing Reusing "open" BPF sys_enter augmenter for "stat" Reusing "open" BPF sys_enter augmenter for "lstat" Reusing "open" BPF sys_enter augmenter for "access" Reusing "connect" BPF sys_enter augmenter for "accept" Reusing "sendto" BPF sys_enter augmenter for "recvfrom" Reusing "connect" BPF sys_enter augmenter for "bind" Reusing "connect" BPF sys_enter augmenter for "getsockname" Reusing "connect" BPF sys_enter augmenter for "getpeername" Reusing "open" BPF sys_enter augmenter for "execve" Reusing "open" BPF sys_enter augmenter for "truncate" Reusing "open" BPF sys_enter augmenter for "chdir" Reusing "open" BPF sys_enter augmenter for "mkdir" Reusing "open" BPF sys_enter augmenter for "rmdir" Reusing "open" BPF sys_enter augmenter for "creat" Reusing "open" BPF sys_enter augmenter for "link" Reusing "open" BPF sys_enter augmenter for "unlink" Reusing "open" BPF sys_enter augmenter for "symlink" Reusing "open" BPF sys_enter augmenter for "readlink" Reusing "open" BPF sys_enter augmenter for "chmod" Reusing "open" BPF sys_enter augmenter for "chown" Reusing "open" BPF sys_enter augmenter for "lchown" Reusing "open" BPF sys_enter augmenter for "mknod" Reusing "open" BPF sys_enter augmenter for "statfs" Reusing "open" BPF sys_enter augmenter for "pivot_root" Reusing "open" BPF sys_enter augmenter for "chroot" Reusing "open" BPF sys_enter augmenter for "acct" Reusing "open" BPF sys_enter augmenter for "swapon" Reusing "open" BPF sys_enter augmenter for "swapoff" Reusing "open" BPF sys_enter augmenter for "delete_module" Reusing "open" BPF sys_enter augmenter for "setxattr" Reusing "open" BPF sys_enter augmenter for "lsetxattr" Reusing "openat" BPF sys_enter augmenter for "fsetxattr" Reusing "open" BPF sys_enter augmenter for "getxattr" Reusing "open" BPF sys_enter augmenter for "lgetxattr" Reusing "openat" BPF sys_enter augmenter for "fgetxattr" Reusing "open" BPF sys_enter augmenter for "listxattr" Reusing "open" BPF sys_enter augmenter for "llistxattr" Reusing "open" BPF sys_enter augmenter for "removexattr" Reusing "open" BPF sys_enter augmenter for "lremovexattr" Reusing "fsetxattr" BPF sys_enter augmenter for "fremovexattr" Reusing "open" BPF sys_enter augmenter for "mq_open" Reusing "open" BPF sys_enter augmenter for "mq_unlink" Reusing "fsetxattr" BPF sys_enter augmenter for "add_key" Reusing "fremovexattr" BPF sys_enter augmenter for "request_key" Reusing "fremovexattr" BPF sys_enter augmenter for "inotify_add_watch" Reusing "fremovexattr" BPF sys_enter augmenter for "mkdirat" Reusing "fremovexattr" BPF sys_enter augmenter for "mknodat" Reusing "fremovexattr" BPF sys_enter augmenter for "fchownat" Reusing "fremovexattr" BPF sys_enter augmenter for "futimesat" Reusing "fremovexattr" BPF sys_enter augmenter for "newfstatat" Reusing "fremovexattr" BPF sys_enter augmenter for "unlinkat" Reusing "fremovexattr" BPF sys_enter augmenter for "linkat" Reusing "open" BPF sys_enter augmenter for "symlinkat" Reusing "fremovexattr" BPF sys_enter augmenter for "readlinkat" Reusing "fremovexattr" BPF sys_enter augmenter for "fchmodat" Reusing "fremovexattr" BPF sys_enter augmenter for "faccessat" Reusing "fremovexattr" BPF sys_enter augmenter for "utimensat" Reusing "connect" BPF sys_enter augmenter for "accept4" Reusing "fremovexattr" BPF sys_enter augmenter for "name_to_handle_at" Reusing "fremovexattr" BPF sys_enter augmenter for "renameat2" Reusing "open" BPF sys_enter augmenter for "memfd_create" Reusing "fremovexattr" BPF sys_enter augmenter for "execveat" Reusing "fremovexattr" BPF sys_enter augmenter for "statx" after perf $ perf trace -vv --max-events=1 |& grep Reusing Reusing "open" BPF sys_enter augmenter for "stat" Reusing "open" BPF sys_enter augmenter for "lstat" Reusing "open" BPF sys_enter augmenter for "access" Reusing "connect" BPF sys_enter augmenter for "accept" Reusing "sendto" BPF sys_enter augmenter for "recvfrom" Reusing "connect" BPF sys_enter augmenter for "bind" Reusing "connect" BPF sys_enter augmenter for "getsockname" Reusing "connect" BPF sys_enter augmenter for "getpeername" Reusing "open" BPF sys_enter augmenter for "execve" Reusing "open" BPF sys_enter augmenter for "truncate" Reusing "open" BPF sys_enter augmenter for "chdir" Reusing "open" BPF sys_enter augmenter for "mkdir" Reusing "open" BPF sys_enter augmenter for "rmdir" Reusing "open" BPF sys_enter augmenter for "creat" Reusing "open" BPF sys_enter augmenter for "link" Reusing "open" BPF sys_enter augmenter for "unlink" Reusing "open" BPF sys_enter augmenter for "symlink" Reusing "open" BPF sys_enter augmenter for "readlink" Reusing "open" BPF sys_enter augmenter for "chmod" Reusing "open" BPF sys_enter augmenter for "chown" Reusing "open" BPF sys_enter augmenter for "lchown" Reusing "open" BPF sys_enter augmenter for "mknod" Reusing "open" BPF sys_enter augmenter for "statfs" Reusing "open" BPF sys_enter augmenter for "pivot_root" Reusing "open" BPF sys_enter augmenter for "chroot" Reusing "open" BPF sys_enter augmenter for "acct" Reusing "open" BPF sys_enter augmenter for "swapon" Reusing "open" BPF sys_enter augmenter for "swapoff" Reusing "open" BPF sys_enter augmenter for "delete_module" Reusing "open" BPF sys_enter augmenter for "setxattr" Reusing "open" BPF sys_enter augmenter for "lsetxattr" Reusing "openat" BPF sys_enter augmenter for "fsetxattr" Reusing "open" BPF sys_enter augmenter for "getxattr" Reusing "open" BPF sys_enter augmenter for "lgetxattr" Reusing "openat" BPF sys_enter augmenter for "fgetxattr" Reusing "open" BPF sys_enter augmenter for "listxattr" Reusing "open" BPF sys_enter augmenter for "llistxattr" Reusing "open" BPF sys_enter augmenter for "removexattr" Reusing "open" BPF sys_enter augmenter for "lremovexattr" Reusing "fsetxattr" BPF sys_enter augmenter for "fremovexattr" Reusing "open" BPF sys_enter augmenter for "mq_open" Reusing "open" BPF sys_enter augmenter for "mq_unlink" Reusing "fsetxattr" BPF sys_enter augmenter for "add_key" Reusing "fremovexattr" BPF sys_enter augmenter for "request_key" Reusing "fremovexattr" BPF sys_enter augmenter for "inotify_add_watch" Reusing "fremovexattr" BPF sys_enter augmenter for "mkdirat" Reusing "fremovexattr" BPF sys_enter augmenter for "mknodat" Reusing "fremovexattr" BPF sys_enter augmenter for "fchownat" Reusing "fremovexattr" BPF sys_enter augmenter for "futimesat" Reusing "fremovexattr" BPF sys_enter augmenter for "newfstatat" Reusing "fremovexattr" BPF sys_enter augmenter for "unlinkat" Reusing "fremovexattr" BPF sys_enter augmenter for "linkat" Reusing "open" BPF sys_enter augmenter for "symlinkat" Reusing "fremovexattr" BPF sys_enter augmenter for "readlinkat" Reusing "fremovexattr" BPF sys_enter augmenter for "fchmodat" Reusing "fremovexattr" BPF sys_enter augmenter for "faccessat" Reusing "fremovexattr" BPF sys_enter augmenter for "utimensat" Reusing "connect" BPF sys_enter augmenter for "accept4" Reusing "fremovexattr" BPF sys_enter augmenter for "name_to_handle_at" Reusing "fremovexattr" BPF sys_enter augmenter for "renameat2" Reusing "open" BPF sys_enter augmenter for "memfd_create" Reusing "fremovexattr" BPF sys_enter augmenter for "execveat" Reusing "fremovexattr" BPF sys_enter augmenter for "statx" TL;DR: These are the new syscalls that can be augmented Reusing "openat" BPF sys_enter augmenter for "open_tree" Reusing "openat" BPF sys_enter augmenter for "openat2" Reusing "openat" BPF sys_enter augmenter for "mount_setattr" Reusing "openat" BPF sys_enter augmenter for "move_mount" Reusing "open" BPF sys_enter augmenter for "fsopen" Reusing "openat" BPF sys_enter augmenter for "fspick" Reusing "openat" BPF sys_enter augmenter for "faccessat2" Reusing "openat" BPF sys_enter augmenter for "fchmodat2" as for the perf trace output: before perf $ perf trace -e faccessat2 --max-events=1 [no output] after perf $ ./perf trace -e faccessat2 --max-events=1 0.000 ( 0.037 ms): waybar/958 faccessat2(dfd: 40, filename: "uevent") = 0 P.S. The reason why this bug was not found in the past five years is probably because it only happens to the newer syscalls whose id is greater, for instance, faccessat2 of id 439, which not a lot of people care about when using perf trace. [Arnaldo]: notes That and the fact that the BPF code was hidden before having to use -e, that got changed kinda recently when we switched to using BPF skels for augmenting syscalls in 'perf trace': ⬢[acme@toolbox perf-tools-next]$ git log --oneline tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.ca9f4c6c999
perf trace: Collect sys_nanosleep first argument29d16de26d
perf augmented_raw_syscalls.bpf: Move 'struct timespec64' to vmlinux.h5069211e2f
perf trace: Use the right bpf_probe_read(_str) variant for reading user data33b725ce7b
perf trace: Avoid compile error wrt redefining bool7d9642311b
perf bpf augmented_raw_syscalls: Add an assert to make sure sizeof(augmented_arg->value) is a power of two.262b54b6c9
perf bpf augmented_raw_syscalls: Add an assert to make sure sizeof(saddr) is a power of two.1836480429
perf bpf_skel augmented_raw_syscalls: Cap the socklen parameter using &= sizeof(saddr)cd2cece61a
perf trace: Tidy comments related to BPF + syscall augmentation5e6da6be30
perf trace: Migrate BPF augmentation to use a skeleton ⬢[acme@toolbox perf-tools-next]$ ⬢[acme@toolbox perf-tools-next]$ git show --oneline --pretty=reference5e6da6be30
| head -15e6da6be30
(perf trace: Migrate BPF augmentation to use a skeleton, 2023-08-10) ⬢[acme@toolbox perf-tools-next]$ I.e. from August, 2023. One had as well to ask for BUILD_BPF_SKEL=1, which now is default if all it needs is available on the system. I simplified the code to not expose the 'struct syscall' outside of tools/perf/util/syscalltbl.c, instead providing a function to go from the index to the syscall id: int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx); Signed-off-by: Howard Chu <howardchu95@gmail.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Link: https://lore.kernel.org/lkml/ZmhlAxbVcAKoPTg8@x1 Link: https://lore.kernel.org/r/20240705132059.853205-2-howardchu95@gmail.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
192 lines
4.8 KiB
C
192 lines
4.8 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* System call table mapper
|
|
*
|
|
* (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
*/
|
|
|
|
#include "syscalltbl.h"
|
|
#include <stdlib.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/zalloc.h>
|
|
|
|
#ifdef HAVE_SYSCALL_TABLE_SUPPORT
|
|
#include <string.h>
|
|
#include "string2.h"
|
|
|
|
#if defined(__x86_64__)
|
|
#include <asm/syscalls_64.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_x86_64;
|
|
#elif defined(__s390x__)
|
|
#include <asm/syscalls_64.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_s390_64;
|
|
#elif defined(__powerpc64__)
|
|
#include <asm/syscalls_64.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_powerpc_64;
|
|
#elif defined(__powerpc__)
|
|
#include <asm/syscalls_32.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_powerpc_32;
|
|
#elif defined(__aarch64__)
|
|
#include <asm/syscalls.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_arm64;
|
|
#elif defined(__mips__)
|
|
#include <asm/syscalls_n64.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_mips_n64;
|
|
#elif defined(__loongarch__)
|
|
#include <asm/syscalls.c>
|
|
const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
|
|
static const char *const *syscalltbl_native = syscalltbl_loongarch;
|
|
#endif
|
|
|
|
struct syscall {
|
|
int id;
|
|
const char *name;
|
|
};
|
|
|
|
static int syscallcmpname(const void *vkey, const void *ventry)
|
|
{
|
|
const char *key = vkey;
|
|
const struct syscall *entry = ventry;
|
|
|
|
return strcmp(key, entry->name);
|
|
}
|
|
|
|
static int syscallcmp(const void *va, const void *vb)
|
|
{
|
|
const struct syscall *a = va, *b = vb;
|
|
|
|
return strcmp(a->name, b->name);
|
|
}
|
|
|
|
static int syscalltbl__init_native(struct syscalltbl *tbl)
|
|
{
|
|
int nr_entries = 0, i, j;
|
|
struct syscall *entries;
|
|
|
|
for (i = 0; i <= syscalltbl_native_max_id; ++i)
|
|
if (syscalltbl_native[i])
|
|
++nr_entries;
|
|
|
|
entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
|
|
if (tbl->syscalls.entries == NULL)
|
|
return -1;
|
|
|
|
for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
|
|
if (syscalltbl_native[i]) {
|
|
entries[j].name = syscalltbl_native[i];
|
|
entries[j].id = i;
|
|
++j;
|
|
}
|
|
}
|
|
|
|
qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
|
|
tbl->syscalls.nr_entries = nr_entries;
|
|
tbl->syscalls.max_id = syscalltbl_native_max_id;
|
|
return 0;
|
|
}
|
|
|
|
struct syscalltbl *syscalltbl__new(void)
|
|
{
|
|
struct syscalltbl *tbl = malloc(sizeof(*tbl));
|
|
if (tbl) {
|
|
if (syscalltbl__init_native(tbl)) {
|
|
free(tbl);
|
|
return NULL;
|
|
}
|
|
}
|
|
return tbl;
|
|
}
|
|
|
|
void syscalltbl__delete(struct syscalltbl *tbl)
|
|
{
|
|
zfree(&tbl->syscalls.entries);
|
|
free(tbl);
|
|
}
|
|
|
|
const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
|
|
{
|
|
return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
|
|
}
|
|
|
|
int syscalltbl__id(struct syscalltbl *tbl, const char *name)
|
|
{
|
|
struct syscall *sc = bsearch(name, tbl->syscalls.entries,
|
|
tbl->syscalls.nr_entries, sizeof(*sc),
|
|
syscallcmpname);
|
|
|
|
return sc ? sc->id : -1;
|
|
}
|
|
|
|
int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx)
|
|
{
|
|
struct syscall *syscalls = tbl->syscalls.entries;
|
|
|
|
return idx < tbl->syscalls.nr_entries ? syscalls[idx].id : -1;
|
|
}
|
|
|
|
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
|
|
{
|
|
int i;
|
|
struct syscall *syscalls = tbl->syscalls.entries;
|
|
|
|
for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
|
|
if (strglobmatch(syscalls[i].name, syscall_glob)) {
|
|
*idx = i;
|
|
return syscalls[i].id;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
|
|
{
|
|
*idx = -1;
|
|
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
|
|
}
|
|
|
|
#else /* HAVE_SYSCALL_TABLE_SUPPORT */
|
|
|
|
#include <libaudit.h>
|
|
|
|
struct syscalltbl *syscalltbl__new(void)
|
|
{
|
|
struct syscalltbl *tbl = zalloc(sizeof(*tbl));
|
|
if (tbl)
|
|
tbl->audit_machine = audit_detect_machine();
|
|
return tbl;
|
|
}
|
|
|
|
void syscalltbl__delete(struct syscalltbl *tbl)
|
|
{
|
|
free(tbl);
|
|
}
|
|
|
|
const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
|
|
{
|
|
return audit_syscall_to_name(id, tbl->audit_machine);
|
|
}
|
|
|
|
int syscalltbl__id(struct syscalltbl *tbl, const char *name)
|
|
{
|
|
return audit_name_to_syscall(name, tbl->audit_machine);
|
|
}
|
|
|
|
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
|
|
const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
|
|
{
|
|
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
|
|
}
|
|
#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
|