io_uring: force creation of separate context for ATTACH_WQ and non-threads

Earlier kernels had SQPOLL threads that could share across anything, as
we grabbed the context we needed on a per-ring basis. This is no longer
the case, so only allow attaching directly if we're in the same thread
group. That is the common use case. For non-group tasks, just setup a
new context and thread as we would've done if sharing wasn't set. This
isn't 100% ideal in terms of CPU utilization for the forked and share
case, but hopefully that isn't much of a concern. If it is, there are
plans in motion for how to improve that. Most importantly, we want to
avoid app side regressions where sharing worked before and now doesn't.
With this patch, functionality is equivalent to previous kernels that
supported IORING_SETUP_ATTACH_WQ with SQPOLL.

Reported-by: Stefan Metzmacher <metze@samba.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2021-03-11 10:17:56 -07:00
parent 7d41e8543d
commit 5c2469e0a2

View File

@ -269,6 +269,7 @@ struct io_sq_data {
unsigned sq_thread_idle;
int sq_cpu;
pid_t task_pid;
pid_t task_tgid;
unsigned long state;
struct completion startup;
@ -7112,6 +7113,10 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
fdput(f);
return ERR_PTR(-EINVAL);
}
if (sqd->task_tgid != current->tgid) {
fdput(f);
return ERR_PTR(-EPERM);
}
refcount_inc(&sqd->refs);
fdput(f);
@ -7122,8 +7127,14 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
{
struct io_sq_data *sqd;
if (p->flags & IORING_SETUP_ATTACH_WQ)
return io_attach_sq_data(p);
if (p->flags & IORING_SETUP_ATTACH_WQ) {
sqd = io_attach_sq_data(p);
if (!IS_ERR(sqd))
return sqd;
/* fall through for EPERM case, setup new sqd/task */
if (PTR_ERR(sqd) != -EPERM)
return sqd;
}
sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
if (!sqd)
@ -7833,6 +7844,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
}
sqd->task_pid = current->pid;
sqd->task_tgid = current->tgid;
tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);