mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
audit,io_uring,io-wq: add some basic audit support to io_uring
This patch adds basic auditing to io_uring operations, regardless of their context. This is accomplished by allocating audit_context structures for the io-wq worker and io_uring SQPOLL kernel threads as well as explicitly auditing the io_uring operations in io_issue_sqe(). Individual io_uring operations can bypass auditing through the "audit_skip" field in the struct io_op_def definition for the operation; although great care must be taken so that security relevant io_uring operations do not bypass auditing; please contact the audit mailing list (see the MAINTAINERS file) with any questions. The io_uring operations are audited using a new AUDIT_URINGOP record, an example is shown below: type=UNKNOWN[1336] msg=audit(1631800225.981:37289): uring_op=19 success=yes exit=0 items=0 ppid=15454 pid=15681 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) Thanks to Richard Guy Briggs for review and feedback. Signed-off-by: Paul Moore <paul@paul-moore.com>
This commit is contained in:
parent
12c5e81d3f
commit
5bd2182d58
@ -14,6 +14,7 @@
|
||||
#include <linux/rculist_nulls.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/audit.h>
|
||||
|
||||
#include "io-wq.h"
|
||||
|
||||
@ -562,6 +563,8 @@ static int io_wqe_worker(void *data)
|
||||
snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
|
||||
set_task_comm(current, buf);
|
||||
|
||||
audit_alloc_kernel(current);
|
||||
|
||||
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
||||
long ret;
|
||||
|
||||
@ -601,6 +604,7 @@ loop:
|
||||
io_worker_handle_work(worker);
|
||||
}
|
||||
|
||||
audit_free(current);
|
||||
io_worker_exit(worker);
|
||||
return 0;
|
||||
}
|
||||
|
@ -79,6 +79,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/audit.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/io_uring.h>
|
||||
@ -917,6 +918,8 @@ struct io_op_def {
|
||||
unsigned needs_async_setup : 1;
|
||||
/* should block plug */
|
||||
unsigned plug : 1;
|
||||
/* skip auditing */
|
||||
unsigned audit_skip : 1;
|
||||
/* size of async data needed, if any */
|
||||
unsigned short async_size;
|
||||
};
|
||||
@ -930,6 +933,7 @@ static const struct io_op_def io_op_defs[] = {
|
||||
.buffer_select = 1,
|
||||
.needs_async_setup = 1,
|
||||
.plug = 1,
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
},
|
||||
[IORING_OP_WRITEV] = {
|
||||
@ -939,16 +943,19 @@ static const struct io_op_def io_op_defs[] = {
|
||||
.pollout = 1,
|
||||
.needs_async_setup = 1,
|
||||
.plug = 1,
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
},
|
||||
[IORING_OP_FSYNC] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_READ_FIXED] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollin = 1,
|
||||
.plug = 1,
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
},
|
||||
[IORING_OP_WRITE_FIXED] = {
|
||||
@ -957,15 +964,20 @@ static const struct io_op_def io_op_defs[] = {
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.plug = 1,
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
},
|
||||
[IORING_OP_POLL_ADD] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_POLL_REMOVE] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_POLL_REMOVE] = {},
|
||||
[IORING_OP_SYNC_FILE_RANGE] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_SENDMSG] = {
|
||||
.needs_file = 1,
|
||||
@ -983,18 +995,23 @@ static const struct io_op_def io_op_defs[] = {
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
},
|
||||
[IORING_OP_TIMEOUT] = {
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_timeout_data),
|
||||
},
|
||||
[IORING_OP_TIMEOUT_REMOVE] = {
|
||||
/* used by timeout updates' prep() */
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_ACCEPT] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollin = 1,
|
||||
},
|
||||
[IORING_OP_ASYNC_CANCEL] = {},
|
||||
[IORING_OP_ASYNC_CANCEL] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_LINK_TIMEOUT] = {
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_timeout_data),
|
||||
},
|
||||
[IORING_OP_CONNECT] = {
|
||||
@ -1009,14 +1026,19 @@ static const struct io_op_def io_op_defs[] = {
|
||||
},
|
||||
[IORING_OP_OPENAT] = {},
|
||||
[IORING_OP_CLOSE] = {},
|
||||
[IORING_OP_FILES_UPDATE] = {},
|
||||
[IORING_OP_STATX] = {},
|
||||
[IORING_OP_FILES_UPDATE] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_STATX] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_READ] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollin = 1,
|
||||
.buffer_select = 1,
|
||||
.plug = 1,
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
},
|
||||
[IORING_OP_WRITE] = {
|
||||
@ -1025,39 +1047,50 @@ static const struct io_op_def io_op_defs[] = {
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.plug = 1,
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
},
|
||||
[IORING_OP_FADVISE] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_MADVISE] = {},
|
||||
[IORING_OP_SEND] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_RECV] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollin = 1,
|
||||
.buffer_select = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_OPENAT2] = {
|
||||
},
|
||||
[IORING_OP_EPOLL_CTL] = {
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_SPLICE] = {
|
||||
.needs_file = 1,
|
||||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_PROVIDE_BUFFERS] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_REMOVE_BUFFERS] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_PROVIDE_BUFFERS] = {},
|
||||
[IORING_OP_REMOVE_BUFFERS] = {},
|
||||
[IORING_OP_TEE] = {
|
||||
.needs_file = 1,
|
||||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
},
|
||||
[IORING_OP_SHUTDOWN] = {
|
||||
.needs_file = 1,
|
||||
@ -6591,6 +6624,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
|
||||
creds = override_creds(req->creds);
|
||||
|
||||
if (!io_op_defs[req->opcode].audit_skip)
|
||||
audit_uring_entry(req->opcode);
|
||||
|
||||
switch (req->opcode) {
|
||||
case IORING_OP_NOP:
|
||||
ret = io_nop(req, issue_flags);
|
||||
@ -6706,6 +6742,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!io_op_defs[req->opcode].audit_skip)
|
||||
audit_uring_exit(!ret, ret);
|
||||
|
||||
if (creds)
|
||||
revert_creds(creds);
|
||||
if (ret)
|
||||
@ -7360,6 +7399,8 @@ static int io_sq_thread(void *data)
|
||||
set_cpus_allowed_ptr(current, cpu_online_mask);
|
||||
current->flags |= PF_NO_SETAFFINITY;
|
||||
|
||||
audit_alloc_kernel(current);
|
||||
|
||||
mutex_lock(&sqd->lock);
|
||||
while (1) {
|
||||
bool cap_entries, sqt_spin = false;
|
||||
@ -7425,6 +7466,8 @@ static int io_sq_thread(void *data)
|
||||
io_run_task_work();
|
||||
mutex_unlock(&sqd->lock);
|
||||
|
||||
audit_free(current);
|
||||
|
||||
complete(&sqd->exited);
|
||||
do_exit(0);
|
||||
}
|
||||
|
@ -286,7 +286,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
|
||||
/* These are defined in auditsc.c */
|
||||
/* Public API */
|
||||
extern int audit_alloc(struct task_struct *task);
|
||||
extern int audit_alloc_kernel(struct task_struct *task);
|
||||
extern void __audit_free(struct task_struct *task);
|
||||
extern void __audit_uring_entry(u8 op);
|
||||
extern void __audit_uring_exit(int success, long code);
|
||||
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
|
||||
unsigned long a2, unsigned long a3);
|
||||
extern void __audit_syscall_exit(int ret_success, long ret_value);
|
||||
@ -323,6 +326,21 @@ static inline void audit_free(struct task_struct *task)
|
||||
if (unlikely(task->audit_context))
|
||||
__audit_free(task);
|
||||
}
|
||||
static inline void audit_uring_entry(u8 op)
|
||||
{
|
||||
/*
|
||||
* We intentionally check audit_context() before audit_enabled as most
|
||||
* Linux systems (as of ~2021) rely on systemd which forces audit to
|
||||
* be enabled regardless of the user's audit configuration.
|
||||
*/
|
||||
if (unlikely(audit_context() && audit_enabled))
|
||||
__audit_uring_entry(op);
|
||||
}
|
||||
static inline void audit_uring_exit(int success, long code)
|
||||
{
|
||||
if (unlikely(!audit_dummy_context()))
|
||||
__audit_uring_exit(success, code);
|
||||
}
|
||||
static inline void audit_syscall_entry(int major, unsigned long a0,
|
||||
unsigned long a1, unsigned long a2,
|
||||
unsigned long a3)
|
||||
@ -554,8 +572,16 @@ static inline int audit_alloc(struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int audit_alloc_kernel(struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void audit_free(struct task_struct *task)
|
||||
{ }
|
||||
static inline void audit_uring_entry(u8 op)
|
||||
{ }
|
||||
static inline void audit_uring_exit(int success, long code)
|
||||
{ }
|
||||
static inline void audit_syscall_entry(int major, unsigned long a0,
|
||||
unsigned long a1, unsigned long a2,
|
||||
unsigned long a3)
|
||||
|
@ -118,6 +118,7 @@
|
||||
#define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */
|
||||
#define AUDIT_BPF 1334 /* BPF subsystem */
|
||||
#define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */
|
||||
#define AUDIT_URINGOP 1336 /* io_uring operation */
|
||||
|
||||
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
|
||||
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
|
||||
|
@ -103,10 +103,12 @@ struct audit_context {
|
||||
enum {
|
||||
AUDIT_CTX_UNUSED, /* audit_context is currently unused */
|
||||
AUDIT_CTX_SYSCALL, /* in use by syscall */
|
||||
AUDIT_CTX_URING, /* in use by io_uring */
|
||||
} context;
|
||||
enum audit_state state, current_state;
|
||||
unsigned int serial; /* serial number for record */
|
||||
int major; /* syscall number */
|
||||
int uring_op; /* uring operation */
|
||||
struct timespec64 ctime; /* time of syscall entry */
|
||||
unsigned long argv[4]; /* syscall arguments */
|
||||
long return_code;/* syscall return code */
|
||||
|
166
kernel/auditsc.c
166
kernel/auditsc.c
@ -959,6 +959,7 @@ static void audit_reset_context(struct audit_context *ctx)
|
||||
ctx->current_state = ctx->state;
|
||||
ctx->serial = 0;
|
||||
ctx->major = 0;
|
||||
ctx->uring_op = 0;
|
||||
ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 };
|
||||
memset(ctx->argv, 0, sizeof(ctx->argv));
|
||||
ctx->return_code = 0;
|
||||
@ -1044,6 +1045,31 @@ int audit_alloc(struct task_struct *tsk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* audit_alloc_kernel - allocate an audit_context for a kernel task
|
||||
* @tsk: the kernel task
|
||||
*
|
||||
* Similar to the audit_alloc() function, but intended for kernel private
|
||||
* threads. Returns zero on success, negative values on failure.
|
||||
*/
|
||||
int audit_alloc_kernel(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* At the moment we are just going to call into audit_alloc() to
|
||||
* simplify the code, but there two things to keep in mind with this
|
||||
* approach:
|
||||
*
|
||||
* 1. Filtering internal kernel tasks is a bit laughable in almost all
|
||||
* cases, but there is at least one case where there is a benefit:
|
||||
* the '-a task,never' case allows the admin to effectively disable
|
||||
* task auditing at runtime.
|
||||
*
|
||||
* 2. The {set,clear}_task_syscall_work() ops likely have zero effect
|
||||
* on these internal kernel tasks, but they probably don't hurt either.
|
||||
*/
|
||||
return audit_alloc(tsk);
|
||||
}
|
||||
|
||||
static inline void audit_free_context(struct audit_context *context)
|
||||
{
|
||||
/* resetting is extra work, but it is likely just noise */
|
||||
@ -1546,6 +1572,44 @@ out:
|
||||
audit_log_end(ab);
|
||||
}
|
||||
|
||||
/**
|
||||
* audit_log_uring - generate a AUDIT_URINGOP record
|
||||
* @ctx: the audit context
|
||||
*/
|
||||
static void audit_log_uring(struct audit_context *ctx)
|
||||
{
|
||||
struct audit_buffer *ab;
|
||||
const struct cred *cred;
|
||||
|
||||
ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_URINGOP);
|
||||
if (!ab)
|
||||
return;
|
||||
cred = current_cred();
|
||||
audit_log_format(ab, "uring_op=%d", ctx->uring_op);
|
||||
if (ctx->return_valid != AUDITSC_INVALID)
|
||||
audit_log_format(ab, " success=%s exit=%ld",
|
||||
(ctx->return_valid == AUDITSC_SUCCESS ?
|
||||
"yes" : "no"),
|
||||
ctx->return_code);
|
||||
audit_log_format(ab,
|
||||
" items=%d"
|
||||
" ppid=%d pid=%d uid=%u gid=%u euid=%u suid=%u"
|
||||
" fsuid=%u egid=%u sgid=%u fsgid=%u",
|
||||
ctx->name_count,
|
||||
task_ppid_nr(current), task_tgid_nr(current),
|
||||
from_kuid(&init_user_ns, cred->uid),
|
||||
from_kgid(&init_user_ns, cred->gid),
|
||||
from_kuid(&init_user_ns, cred->euid),
|
||||
from_kuid(&init_user_ns, cred->suid),
|
||||
from_kuid(&init_user_ns, cred->fsuid),
|
||||
from_kgid(&init_user_ns, cred->egid),
|
||||
from_kgid(&init_user_ns, cred->sgid),
|
||||
from_kgid(&init_user_ns, cred->fsgid));
|
||||
audit_log_task_context(ab);
|
||||
audit_log_key(ab, ctx->filterkey);
|
||||
audit_log_end(ab);
|
||||
}
|
||||
|
||||
static void audit_log_exit(void)
|
||||
{
|
||||
int i, call_panic = 0;
|
||||
@ -1581,6 +1645,9 @@ static void audit_log_exit(void)
|
||||
audit_log_key(ab, context->filterkey);
|
||||
audit_log_end(ab);
|
||||
break;
|
||||
case AUDIT_CTX_URING:
|
||||
audit_log_uring(context);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
@ -1751,6 +1818,105 @@ static void audit_return_fixup(struct audit_context *ctx,
|
||||
ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE);
|
||||
}
|
||||
|
||||
/**
|
||||
* __audit_uring_entry - prepare the kernel task's audit context for io_uring
|
||||
* @op: the io_uring opcode
|
||||
*
|
||||
* This is similar to audit_syscall_entry() but is intended for use by io_uring
|
||||
* operations. This function should only ever be called from
|
||||
* audit_uring_entry() as we rely on the audit context checking present in that
|
||||
* function.
|
||||
*/
|
||||
void __audit_uring_entry(u8 op)
|
||||
{
|
||||
struct audit_context *ctx = audit_context();
|
||||
|
||||
if (ctx->state == AUDIT_STATE_DISABLED)
|
||||
return;
|
||||
|
||||
/*
|
||||
* NOTE: It's possible that we can be called from the process' context
|
||||
* before it returns to userspace, and before audit_syscall_exit()
|
||||
* is called. In this case there is not much to do, just record
|
||||
* the io_uring details and return.
|
||||
*/
|
||||
ctx->uring_op = op;
|
||||
if (ctx->context == AUDIT_CTX_SYSCALL)
|
||||
return;
|
||||
|
||||
ctx->dummy = !audit_n_rules;
|
||||
if (!ctx->dummy && ctx->state == AUDIT_STATE_BUILD)
|
||||
ctx->prio = 0;
|
||||
|
||||
ctx->context = AUDIT_CTX_URING;
|
||||
ctx->current_state = ctx->state;
|
||||
ktime_get_coarse_real_ts64(&ctx->ctime);
|
||||
}
|
||||
|
||||
/**
|
||||
* __audit_uring_exit - wrap up the kernel task's audit context after io_uring
|
||||
* @success: true/false value to indicate if the operation succeeded or not
|
||||
* @code: operation return code
|
||||
*
|
||||
* This is similar to audit_syscall_exit() but is intended for use by io_uring
|
||||
* operations. This function should only ever be called from
|
||||
* audit_uring_exit() as we rely on the audit context checking present in that
|
||||
* function.
|
||||
*/
|
||||
void __audit_uring_exit(int success, long code)
|
||||
{
|
||||
struct audit_context *ctx = audit_context();
|
||||
|
||||
/*
|
||||
* TODO: At some point we will likely want to filter on io_uring ops
|
||||
* and other things similar to what we do for syscalls, but that
|
||||
* is something for another day; just record what we can here.
|
||||
*/
|
||||
|
||||
if (ctx->context == AUDIT_CTX_SYSCALL) {
|
||||
/*
|
||||
* NOTE: See the note in __audit_uring_entry() about the case
|
||||
* where we may be called from process context before we
|
||||
* return to userspace via audit_syscall_exit(). In this
|
||||
* case we simply emit a URINGOP record and bail, the
|
||||
* normal syscall exit handling will take care of
|
||||
* everything else.
|
||||
* It is also worth mentioning that when we are called,
|
||||
* the current process creds may differ from the creds
|
||||
* used during the normal syscall processing; keep that
|
||||
* in mind if/when we move the record generation code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We need to filter on the syscall info here to decide if we
|
||||
* should emit a URINGOP record. I know it seems odd but this
|
||||
* solves the problem where users have a filter to block *all*
|
||||
* syscall records in the "exit" filter; we want to preserve
|
||||
* the behavior here.
|
||||
*/
|
||||
audit_filter_syscall(current, ctx);
|
||||
audit_filter_inodes(current, ctx);
|
||||
if (ctx->current_state != AUDIT_STATE_RECORD)
|
||||
return;
|
||||
|
||||
audit_log_uring(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
/* this may generate CONFIG_CHANGE records */
|
||||
if (!list_empty(&ctx->killed_trees))
|
||||
audit_kill_trees(ctx);
|
||||
|
||||
audit_filter_inodes(current, ctx);
|
||||
if (ctx->current_state != AUDIT_STATE_RECORD)
|
||||
goto out;
|
||||
audit_return_fixup(ctx, success, code);
|
||||
audit_log_exit();
|
||||
|
||||
out:
|
||||
audit_reset_context(ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* __audit_syscall_entry - fill in an audit record at syscall entry
|
||||
* @major: major syscall type (function)
|
||||
|
Loading…
Reference in New Issue
Block a user